
# Usage:
# Rscript apaCount_fixed.R long3utr.txt 19 myfile.sorted.bam
# or R CMD BATCH "--args long3utr.txt 19 myfile.sorted.bam" apaCount_fixed.R &
# compute tag counts in each blocks
args <- commandArgs(TRUE)

dd = read.table(args[1],header=FALSE,sep="\t")
#dd = read.table("long3utr_mm9.txt" ,header=FALSE,sep="\t")

#colnames(dd) = c("geneid",'chr',"strand",'start','end')
colnames(dd) = c("geneid","strand",'chr','start','end')

require(GenomicRanges)
#source("/home/lujun/ngslib/data.frame2GRanges.R")
source("data.frame2GRanges.R")

grs = data.frame2GRanges(dd,keepColumns=TRUE,ignoreStrand = FALSE)
#grs = keepSeqlevels(grs,paste('chr',c(1:22,'X','Y'),sep=""))
#grs = keepSeqlevels(grs,paste('chr',c(1:as.character(args#[2]),'X','Y'),sep=""))
grs = keepSeqlevels(grs,paste('chr',c(1:as.character(args[2]),'X'),sep=""))
names(elementMetadata(grs)) = 'geneid'

require(Rsamtools)
#n1 = readBamGappedAlignments(args[2])
n1 = readBamGappedAlignments(args[3])
n1 = resize(granges(n1),1)

strand(n1) = "*"
cts = countOverlaps(grs,n1) # subset alignments
names(cts) = elementMetadata(grs)$geneid

#outf = gsub('.bam','.cts.rda',basename(args[2]))
outf = gsub('.bam','.cts.rda',basename(args[3]))
save(cts,file=outf)
