
# Usage:
# Rscript apa3utr_fixed.R txdb.mm9.refGene.sqlite 19 long3utr.txt
#or R CMD BATCH "--args txdb.mm9.refGene.sqlite 19 long3utr.txt" apa3utr_fixed.R &
# select non-redundant 3UTR with length > 600
args <- commandArgs(TRUE)

minlen = 600

library(GenomicFeatures)
txdb = loadFeatures(args[1])

u3  = threeUTRsByTranscript(txdb)
#u3  = keepSeqlevels(u3,paste('chr',c(1:22,'X','Y'),sep=""))
#u3  = keepSeqlevels(u3,paste('chr',c(1:as.character(args[2]),'X','Y'),sep=""))
u3  = keepSeqlevels(u3,paste('chr',c(1:as.character(args[2]),'X'),sep=""))

# time-consuming,u3 ordered already, see u3['521']
u3e <- endoapply(u3,function(it) tail(it,1))

u3e = unlist(u3e, use.names=FALSE)
elementMetadata(u3e) = NULL
names(u3e) = names(u3)

fu4 = u3e[width(u3e)>minlen]
isdup = duplicated(ranges(fu4))
long3utr = fu4[!isdup]
aa = id2name(txdb, feature.type="tx")[names(long3utr)]
names(long3utr) = aa
long3utr = long3utr[!duplicated(aa)]

for (i in names(long3utr)){
	chr = as.character(seqnames(long3utr[i]))
	strand = as.character(strand(long3utr[i]))
  wid = ifelse(width(long3utr[i])<2000,100,
          ifelse(width(long3utr[i])<4000,200,
            ifelse(width(long3utr[i])<8000,400,800)))
  if (as.character(strand(long3utr[i]))=="+"){
ir <- breakInChunks(width(long3utr[i]),wid) 
grngs <- GRanges(seqnames = chr, ranges = ir, strand = strand)
    irs = shift(grngs, start(long3utr[i]))
  }else{
	ir <- breakInChunks(width(long3utr[i]),wid)
	grngs <- rev(GRanges(seqnames = chr, ranges = ir, strand = strand))


    irs = shift(grngs, start(long3utr[i]))
  }

  
  
  	#tmp = cbind(i,chr,strand,as.data.frame(irs))
	tmp = cbind(i,strand,as.data.frame(irs))

  write.table(tmp[,1:5],args[3],row.names=FALSE,sep="\t",
              append=TRUE,quote=FALSE,col.names=FALSE)
}
