IUTA <-
function(bam.list.1,bam.list.2,transcript.info,rep.info.1=rep(1,length(bam.list.1)),rep.info.2=rep(1,length(bam.list.2)),
		output.dir=paste(getwd(),"/IUTA",sep=""),output.na=FALSE,
		genes.interested="all",
		strand.specific=rep("1.5",length(rep.info.1)+length(rep.info.2)),
                gene.filter.chr=c("_","M","Un"),mapq.cutoff=NA,alignment.per.kb.cutoff=10,IU.for.NA.estimate="even",
		sample.FLD=FALSE,FLD="empirical",mean.FL.normal=NA,sd.FL.normal=NA,number.samples.EFLD=1E6,
                isoform.weight.cutoff=1E-4,adjust.weight=1E-4,epsilon=1E-5,
                test.type="SKK",log.p=FALSE,fwer=1E-2,
		mc.cores.user=NA){

	max.cores<-detectCores()
	if (is.na(mc.cores.user)){
		mc.cores<-max.cores
		cat(paste("All ",max.cores," cores on the machine will be used.",sep=""),"\n")
	}
	else if (mc.cores.user>max.cores){
		mc.cores<-max.cores
		cat(paste("Warning: User specified more cores than the machine has. All ",max.cores," cores on the machine will be used.",sep=""),"\n")	
	}
	else{
		mc.cores<-mc.cores.user
	}
	ptm<-proc.time()

        bam.list<-c(bam.list.1,bam.list.2)
        rep.info<-c(rep.info.1,rep.info.2)
        bam.group.list<-split(bam.list,rep(1:length(rep.info),rep.info))

	cat("Preparing indices of bam files ......","\n")
	count.indices<-0
	for (bam.file in bam.list){
		if (!file.exists(paste(bam.file,".bai",sep=""))){
		        indexBam(bam.file)
		        count.indices<-count.indices+1
		}
	}
	cat(paste(count.indices," bam file(s) were indexed.",sep=""),"\n")

	if (FLD=="empirical" & !(is.na(mean.FL.normal) & is.na(sd.FL.normal))){
		cat("Warning: No inputted mean/sd(standard deviation) are allowed for Empirical Fragment Length Distribution (EFLD)!","\n")
		cat("Warning: The inputted mean/sd are ignored!","\n")
	}

	cat("Filtering genes according to the input annotation ...","\n")
	transcript.gtf<-read.delim(transcript.info,stringsAsFactors=FALSE,header=FALSE)
	transcript.gtf<-transcript.gtf[transcript.gtf[,3]=="exon",]
	gtf.9<-strsplit(transcript.gtf[,9]," |;")
	order.gene.id<-match("gene_id",gtf.9[[1]])+1
	gene.ref<-unlist(lapply(gtf.9,"[",order.gene.id))
	order.trans.id<-match("transcript_id",gtf.9[[1]])+1
	transcript<-unlist(lapply(gtf.9,"[",order.trans.id))
	transcript.gtf.1.7.unique<-unique(cbind(transcript.gtf[,c(1,7)],gene.ref))
	if (all(is.na(gene.filter.chr))){
		temp.rec<-by(transcript.gtf.1.7.unique,as.factor(transcript.gtf.1.7.unique[,3]),function(transcript.gtf.i){ifelse(nrow(transcript.gtf.i)==1,TRUE,FALSE)})
	}
	else {
		temp.rec<-by(transcript.gtf.1.7.unique,as.factor(transcript.gtf.1.7.unique[,3]),function(transcript.gtf.i){ifelse(nrow(transcript.gtf.i)==1 & !grepl(paste(gene.filter.chr,collapse="|"),transcript.gtf.i[1,1]),TRUE,FALSE)})
	}
	keep.genes<-names(temp.rec)[temp.rec]
	temp.index.annotation<-(gene.ref %in% keep.genes)

	transcript.gtf<-transcript.gtf[temp.index.annotation,]
	transcript<-transcript[temp.index.annotation]
	gene.ref<-gene.ref[temp.index.annotation]
	gene.transcript.ref<-unique(cbind(transcript,gene.ref))
	n.transcript<-by(gene.transcript.ref,as.factor(gene.transcript.ref[,2]),nrow)
	candidate.genes<-(names(n.transcript))[n.transcript>1]
	if (genes.interested=="all"){
		gene.pool<-candidate.genes
		cat(paste("all ",length(gene.pool)," genes will be analyzed.",sep=""),"\n")
	}
	else{
		gene.pool<-genes.interested[genes.interested %in% candidate.genes]
		cat(paste(length(gene.pool)," genes will be analyzed, ",length(genes.interested)-length(gene.pool)," genes are excluded because of annotation.",sep=""),"\n")
	}	

	gr.c<-BiocGenerics::unique(GRanges(seqnames=transcript.gtf[,1],ranges=IRanges(transcript.gtf[,4],transcript.gtf[,5]),strand<-transcript.gtf[,7]))
	gr.c<-gr.c[countOverlaps(gr.c,gr.c,ignore.strand=TRUE)<=1L]
	gr.c<-gr.c[order(width(gr.c),decreasing=TRUE)]

	strand.gr.c<-as.character(strand(gr.c))
	strand.gene.pool<-transcript.gtf[match(gene.pool,gene.ref),7]

        transcript.se.rec<-by(transcript.gtf[,4:5],as.factor(transcript),function(i)sort(c(as.matrix(i))))
        transcript.se.rec<-transcript.se.rec[names(transcript.se.rec) %in% gene.transcript.ref[gene.transcript.ref[,2] %in% gene.pool,1]]

	theta.est.list.all<-rep(0,length(rep.info))
	for (bam.group.idx in 1:length(rep.info)){
		pos.strand.flag<-switch(strand.specific[bam.group.idx],"1"=c(97,145,99,147,65,129,67,131),"2"=c(81,161,83,163,65,129,67,131),"1.5"=c(97,145,99,147,65,129,67,131,81,161,83,163,113,177,115,179))
		neg.strand.flag<-switch(strand.specific[bam.group.idx],"2"=c(97,145,99,147,113,177,115,179),"1"=c(81,161,83,163,113,177,115,179),"1.5"=c(97,145,99,147,65,129,67,131,81,161,83,163,113,177,115,179))
		good.flags.c<-unname(list("+"=pos.strand.flag,"-"=neg.strand.flag)[strand.gr.c])
		good.flags.p<-unname(list("+"=pos.strand.flag,"-"=neg.strand.flag)[strand.gene.pool])

		bam.group<-bam.group.list[[bam.group.idx]]

		cat(paste("Analyzing the sample with ",length(bam.group)," technical replicates with first replicate: ",substr(basename(bam.group[1]),1,nchar(basename(bam.group[1]))-4)," ......",sep=""),"\n")
		if (sample.FLD==TRUE | (bam.group.idx==1) | (bam.group.idx==(length(rep.info.1)+1))){
			cat("Estimating Empirical Fragment Length Distribution (EFLD) ......","\n")

			what<-c("qname","flag","pos","mapq","cigar")
			frag.len.rec<-NULL; count.sample<-0; chunk.start<-0;
			while (count.sample<number.samples.EFLD & chunk.start<length(gr.c)){
				gr.c.chunk<-gr.c[(chunk.start+1):min(length(gr.c),chunk.start+1000)]
				names(gr.c.chunk)<-1:length(gr.c.chunk)
				param<-ScanBamParam(which=gr.c.chunk,what=what)

				names.bam.out<-unname(unlist(BiocGenerics::sapply(bamWhich(param),names)))
				for (bam.file in bam.group){
					bam.gene<-scanBam(bam.file,param=param)
					frag.len.rec<-c(frag.len.rec,unlist(mclapply(1:length(gr.c.chunk),function(i){
							bam.gene.i<-bam.gene[[match(as.character(i),names.bam.out)]]
							if (length(bam.gene.i$qname)==0){
								index<-NULL
							}
							else{
								index<-filter.reads(bam.gene.i,good.flags.c[[chunk.start+i]],mapq.cutoff)
							}
							if (length(index)>0){
								order.qname<-order(bam.gene.i$qname[index],bam.gene.i$pos[index])
								return(.C("extract_frag_len",
									as.integer((bam.gene.i$pos[index])[order.qname]),as.character((bam.gene.i$cigar[index])[order.qname]),
									as.integer(start(gr.c.chunk)[i]),as.integer(end(gr.c.chunk)[i]),as.integer(length(index)/2),
									as.integer(1),c.length=integer(length(index)/2))[["c.length"]])
							}},mc.cores=mc.cores)))
					frag.len.rec<-frag.len.rec[frag.len.rec!=0]
					rm(bam.gene); gc();
				}	
				count.sample<-length(frag.len.rec); chunk.start<-chunk.start+1000;
			}
	
			cat(paste("Number of samples used for EFLD = ",length(frag.len.rec),sep=""),"\n")
			cat(paste("Mean of EFLD (before smoothing) = ",round(mean(frag.len.rec),1),"; SD of EFLD (before smoothing) = ",round(sd(frag.len.rec),1),sep=""),"\n")

			if (FLD=="empirical"){
				min.FLR<-min(frag.len.rec)
				max.FLR<-max(frag.len.rec)
				frag.len.dist<-smooth.FLD(frag.len.rec,width=5)
			}
			else{
				mean.len.dist<-ifelse(is.na(mean.FL.normal),mean(frag.len.rec),mean.FL.normal)
				sd.len.dist<-ifelse(is.na(sd.FL.normal),sd(frag.len.rec),sd.FL.normal)
				cat("Warning: Please consider using EFLD estimates for Fragment Length Distribution if they are much different from the user specified ones!","\n")
			}
		}
	
		theta.est.list<-mclapply(1:length(gene.pool),function(i){
			gene.symbol<-gene.pool[i]

			isoform.id<-sort(unique(transcript[gene.ref==gene.symbol]))
			n.isoform<-length(isoform.id)

			start.end<-unname(transcript.se.rec[match(isoform.id,names(transcript.se.rec))])
			start<-lapply(start.end,"[",c(T,F))
			end<-lapply(start.end,"[",c(F,T))
			l.isoform<-unlist(lapply(start.end,function(i){sum(diff(i)[c(T,F)]+1)}))
			
			temp.pts<-c()
        		for (j in 1:n.isoform){
                		for (k in 1:length(start[[j]])){
                        		temp.pts<-unique(c(temp.pts,start[[j]][k]:end[[j]][k]))
                		}
        		}
			alignment.gene.cutoff<-alignment.per.kb.cutoff*length(temp.pts)/1000

			what<-c("qname","flag","pos","mapq","cigar")
			which<-GRanges(seqnames=unique(transcript.gtf[transcript %in% isoform.id,1]),ranges=IRanges(min(unlist(start)),max(unlist(end))))
			param<-ScanBamParam(which=which,what=what)

			read.matrix<-NULL
			flag.no.reads<-0
			for (bam.file in bam.group){
				bam.gene<-scanBam(bam.file,param=param)[[1]]

				if (length(bam.gene$qname)==0){
					index<-integer(0)
				}
				else{
					index<-filter.reads(bam.gene,good.flags.p[[i]],mapq.cutoff)
				}
				if (length(index)>0){
					order.qname<-order(bam.gene$qname[index],bam.gene$pos[index])
					read.matrix<-rbind(read.matrix,matrix(unlist(lapply(1:n.isoform,function(i){
										return(.C("extract_frag_len",
												as.integer((bam.gene$pos[index])[order.qname]),as.character((bam.gene$cigar[index])[order.qname]),
												as.integer(start[[i]]),as.integer(end[[i]]),as.integer(length(index)/2),as.integer(length(start[[i]])),
												c.length=integer(length(index)/2))[["c.length"]])})),ncol=n.isoform))
					flag.no.reads<-1
				}
				rm(bam.gene); gc();
			}

			if (flag.no.reads==1){						
				if (sum(rowSums(abs(read.matrix))>0)>0){
					read.matrix<-read.matrix[rowSums(abs(read.matrix))>0,,drop=FALSE]

					N.old<-nrow(read.matrix)
					if (FLD=="normal"){
						weight<-apply(read.matrix,1:2,function(x){ifelse(x==0,0,pnorm(x+1,mean.len.dist,sd.len.dist)-pnorm(x,mean.len.dist,sd.len.dist))})/(matrix(rep(l.isoform,N.old),nrow=N.old,byrow=TRUE)-read.matrix+matrix(rep(1,n.isoform*N.old),nrow=N.old))
					}
					else{
						weight<-apply(read.matrix,1:2,function(x){ifelse(x<min.FLR | x>max.FLR,0,frag.len.dist[x-min.FLR+1])})/(matrix(rep(l.isoform,N.old),nrow=N.old,byrow=TRUE)-read.matrix+matrix(rep(1,n.isoform*N.old),nrow=N.old))
					}
					temp.index<-(rowSums(weight)>0)
					if (sum(temp.index)>alignment.gene.cutoff){
						weight<-weight[temp.index,,drop=FALSE]
						theta.est<-.C("EM_model_C",as.integer(nrow(weight)),as.integer(ncol(weight)),as.double(c(weight)),as.double(epsilon),c.usage=double(ncol(weight)))[["c.usage"]]
						rm(read.matrix,weight); gc();
						return(theta.est/(l.isoform*sum(theta.est/l.isoform)))
					}
					else{
						rm(read.matrix,weight); gc();
						return(-1)
					}
				}
				else{
					rm(read.matrix); gc();
					return(-2)
				}
			}
			else{
				return(-3)
			}
		},mc.cores=mc.cores)

		index.gene.NRAF<-sapply(theta.est.list,function(i){i[1]==-3})
		index.gene.NDFA<-sapply(theta.est.list,function(i){i[1]==-2})
		index.gene.NDFE<-sapply(theta.est.list,function(i){i[1]==-1})
		theta.est.list[index.gene.NRAF | index.gene.NDFA | index.gene.NDFE]<-NA


		cat(paste("Number of genes with no reads after filtering: ",sum(index.gene.NRAF),sep=""),"\n")
		cat(paste("Number of genes with no data fits annotation: ",sum(index.gene.NDFA),sep=""),"\n")
		cat(paste("Number of genes with no enough data fits FLD: ",sum(index.gene.NDFE),sep=""),"\n")
		cat(paste("Number of genes with isoform usages estimated: ",length(gene.pool)-sum(index.gene.NRAF)-sum(index.gene.NDFA)-sum(index.gene.NDFE),sep=""),"\n")

		theta.est.list.all[bam.group.idx]<-list(theta.est.list)

		cat("Done!","\n")
	}

	cat("Testing for differential isoform usages ......","\n")
	all.info<-mclapply(gene.pool,function(gene.symbol){
		n.isoform<-n.transcript[match(gene.symbol,names(n.transcript))]
	
		n.gp1<-length(rep.info.1)
		n.gp2<-length(rep.info.2)

		theta.gp1<-mat.or.vec(n.gp1,n.isoform)
		for (i in 1:n.gp1){
			theta.gp1[i,]<-theta.est.list.all[[i]][[match(gene.symbol,gene.pool)]]
		}	
		theta.gp2<-mat.or.vec(n.gp2,n.isoform)
		for (i in 1:n.gp2){
			theta.gp2[i,]<-theta.est.list.all[[n.gp1+i]][[match(gene.symbol,gene.pool)]]
		}	

		theta.gp1<-theta.gp1[complete.cases(theta.gp1),,drop=FALSE]
		theta.gp2<-theta.gp2[complete.cases(theta.gp2),,drop=FALSE]

		sample.gp1<-nrow(theta.gp1)
		sample.gp2<-nrow(theta.gp2)
		sample.size<-paste(sample.gp1,",",sample.gp2,sep="")

		if (sample.gp1>1 & sample.gp2>1){
			index.valid.col<-which(apply(rbind(theta.gp1,theta.gp2),2,max)>isoform.weight.cutoff)		

			if (length(index.valid.col)==0){
				p.value<-rep(NA,length(test.type))
			}					
			else if (length(index.valid.col)==1){
				p.value<-rep(ifelse(log.p,0,1),length(test.type))
			}
			else{
				theta.gp1<-theta.gp1[,index.valid.col,drop=FALSE]
				theta.gp2<-theta.gp2[,index.valid.col,drop=FALSE]		

				theta.gp1<-apply(theta.gp1,1:2,function(i){return(ifelse(i<adjust.weight,adjust.weight,i))})
				theta.gp2<-apply(theta.gp2,1:2,function(i){return(ifelse(i<adjust.weight,adjust.weight,i))})

				ilr.theta.gp1<-ilr(theta.gp1)
				ilr.theta.gp2<-ilr(theta.gp2)
				if (n.gp1!=sample.gp1){
					if (IU.for.NA.estimate=="even"){
						temp.matrix.gp1<-mat.or.vec(n.gp1-sample.gp1,length(index.valid.col)-1)
					}
					else if (IU.for.NA.estimate=="average"){
						temp.matrix.gp1<-matrix(rep(colMeans(ilr.theta.gp1),n.gp1-sample.gp1),nrow=(n.gp1-sample.gp1),byrow=TRUE)
					}
					else {
						temp.matrix.gp1<-NULL
					}
					ilr.theta.gp1<-rbind(ilr.theta.gp1,temp.matrix.gp1)
				}
				if (n.gp2!=sample.gp2){
					if (IU.for.NA.estimate=="even"){
                                                temp.matrix.gp2<-mat.or.vec(n.gp2-sample.gp2,length(index.valid.col)-1)
                                        }
                                        else if (IU.for.NA.estimate=="average"){
                                                temp.matrix.gp2<-matrix(rep(colMeans(ilr.theta.gp2),n.gp2-sample.gp2),nrow=(n.gp2-sample.gp2),byrow=TRUE)
                                        }
                                        else {
                                                temp.matrix.gp2<-NULL
                                        }
					ilr.theta.gp2<-rbind(ilr.theta.gp2,temp.matrix.gp2)
				}

				if (length(index.valid.col)==2){
					p.value<-sapply(test.type,function(i){switch(i,
                                                                        "CQ"=CQ_BF_u(ilr.theta.gp1,ilr.theta.gp2,log.p=log.p),
                                                                        "SKK"=SKK_BF_u(ilr.theta.gp1,ilr.theta.gp2,log.p=log.p),
                                                                        "KY"=ifelse(min(n.gp1,n.gp2)<=(length(index.valid.col)-1),NA,KY_BF_u(ilr.theta.gp1,ilr.theta.gp2,log.p=log.p)))})

				}
				else{
					p.value<-sapply(test.type,function(i){switch(i,
									"CQ"=CQ_BF(ilr.theta.gp1,ilr.theta.gp2,log.p=log.p),
									"SKK"=SKK_BF(ilr.theta.gp1,ilr.theta.gp2,log.p=log.p),
									"KY"=ifelse(min(n.gp1,n.gp2)<=(length(index.valid.col)-1),NA,KY_BF(ilr.theta.gp1,ilr.theta.gp2,log.p=log.p)))})
				}
			}
		}
		else{
			p.value<-rep(NA,length(test.type))
		}

		return(list("p.value"=p.value,"sample.size"=sample.size,"n.isoform"=n.isoform))
	},mc.cores=mc.cores)
	cat("Done!","\n")

	cat("outputing results......","\n")
	
	if (file.exists(output.dir)){
		cat("output directory already exists!","\n")
	}
	else{
		dir.create(output.dir,recursive=TRUE)
		cat("output direcoty has been created!","\n")
	}

	cat("writing test results into a text file ......","\n")

	p.value<-data.frame(matrix(unlist(lapply(all.info,function(x){x[["p.value"]]})),byrow=T,ncol=length(test.type)))
	sample.size<-unlist(lapply(all.info,function(x){x[["sample.size"]]}))
	n.isoform<-unlist(lapply(all.info,function(x){x[["n.isoform"]]}))

	p.value.1<-ifelse(is.na(p.value[,1]),"Notest",test.type[1])	
	p.value.2<-p.value[,1]
	order.output<-order(p.value.2);
	if (log.p){
		significant<-ifelse(p.value.2<(log(fwer)-log(sum(!is.na(p.value.2)))),"yes","no")
	}
	else{
		significant<-ifelse(p.value.2<(fwer/sum(!is.na(p.value.2))),"yes","no")
	}
	test.result<-data.frame(gene.pool,n.isoform,sample.size,p.value.1,p.value,significant,stringsAsFactors=FALSE)[order.output,]
	if (output.na){
		write.table(test.result,paste(output.dir,"/p_values.txt",sep=""),sep="\t",quote=FALSE,row.names=FALSE,col.names=c("gene","number_of_isoform","test_sample_size","test","p_value",test.type[-1],"significant"))
	}
	else{
		write.table(test.result[rowSums(is.na(test.result[,5:(4+length(test.type)),drop=FALSE]))!=(ncol(test.result)-5),,drop=FALSE],paste(output.dir,"/p_values.txt",sep=""),sep="\t",quote=FALSE,row.names=FALSE,col.names=c("gene","number_of_isoform","test_sample_size","test","p_value",test.type[-1],"significant"))
	}
	cat("Done!","\n")

	cat("Writing isoform usage estimates into a text file......","\n")

	estimates.file<-file(paste(output.dir,"/estimates.txt",sep=""), "w")
	estimates.comments<-paste("# ",length(gene.pool)," genes are analyzed; ",length(rep.info.1),"+",length(rep.info.2)," samples are used;\n",sep="")
	estimates.comments<-paste(estimates.comments,"# A ",FLD,ifelse(FLD=="empirical"," (after smoothing) ","")," fragment length distribution is used in analysis\n",sep="" )
	estimates.comments.header<-paste(estimates.comments,paste("gene","isoform",paste(substr(basename(sapply(bam.group.list,"[",1)),1,nchar(basename(sapply(bam.group.list,"[",1)))-4),collapse="\t"),sep="\t"),sep="")
	writeLines(estimates.comments.header,estimates.file)
	close(estimates.file)

	gene.pool.ref<-gene.transcript.ref[unlist(lapply(gene.pool[order.output],function(i){index.i<-which(gene.transcript.ref[,2]==i); return(index.i[order(gene.transcript.ref[index.i,1])])})),2:1]
	estimates.matrix<-matrix(unlist(lapply(theta.est.list.all,function(i){
									temp.list<-lapply(1:length(i),function(j){if (any(is.na(i[[j]]))) {return(rep(NA,n.isoform[j]))} else {return(i[[j]])}})
									return(unlist(temp.list[order.output]))})),ncol=length(theta.est.list.all))
	estimates.result<-data.frame(gene.pool.ref,estimates.matrix,stringsAsFactors=FALSE)
	if (output.na){
		write.table(estimates.result,paste(output.dir,"/estimates.txt",sep=""),append=TRUE,sep="\t",quote=FALSE,row.names=FALSE,col.names=FALSE)
	}
	else{
		write.table(estimates.result[rowSums(is.na(estimates.result[,-(1:2),drop=FALSE]))!=(ncol(estimates.result)-2),,drop=FALSE],paste(output.dir,"/estimates.txt",sep=""),append=TRUE,sep="\t",quote=FALSE,row.names=FALSE,col.names=FALSE)
	}
	cat("Done!","\n")

	cat("Runing time:","\n")
	cat(proc.time()-ptm,"\n")
}
