#!/bin/bash
# this script is to create illumina read profile from multiple fastq or gzipped
# fastq files
#Weichun Huang at whduke@gmail.com

pDIR=`dirname $0`
ext=fq
if [[ $# -eq 2 ]]; then
       	outFile=$1
	iDIR=$2
elif [[ $# -eq 3 ]];then
       	outFile=$1
	iDIR=$2
	ext=$3
else
        echo "This tool is to create an illumina read quality profile from multiple fastq or gzipped fastq files"
	echo ""
	echo "USAGE:"
	echo "	./art_profiler_illumina out_profile_name input_fastq_dir [fastq_filename_extension (default: fq)]"
	echo ""
	echo "PARAMETERS:"
       	echo "	out_profile_name:  the name of read quality profile"  
       	echo "	input_fastq_dir:   the directory of input fastq or zipped fastq files"  
       	echo "	fastq_filename_extension: fastq or gzipped fastq filename extension (default: fq)"
	echo ""
	echo "EXAMPLES:"
	echo "	1) create hiseq2k profiles from all *.fq.gz in the directory fastq_dat_dir"
	echo "		./art_profiler_illumina hiseq2k fastq_dat_dir fq.gz"
	echo "	2) create miseq2500 profiles from all *.fq in the directory fastq_dat_dir"
	echo "		./art_profiler_illumina miseq250 fastq_dat_dir fq"
	echo "	3) create hiseq1k profiles from all *.fq in the directory fastq_dat_dir"
	echo "		./art_profiler_illumina hiseq1k fastq_dat_dir"
       	echo ""       
	echo "NOTES: For paired-end fastq files, e.g., *.fq or *.fq.gz,"
       	echo "       the filenames of the 1st reads must be *_1.fq or *_1.fq.gz,"
       	echo "       and those of the 2nd reads must be *_2.fq or *_2.fq.gz,"
       	echo ""       
	echo "CONTACT: Weichun Huang at whduke@gmail.com"
	exit
fi

oList=
of1st=
of2nd=
i=0
k1=0
k2=0
for fq in $iDIR/*.$ext 
do
       	$pDIR/fastqReadAvg.pl $fq & 
        t1=${fq%_1.$ext}	
        t1=${fq%.1.$ext}	
        t2=${fq%_2.$ext}	
        t2=${fq%.2.$ext}	

	if [[ $t1 != $fq ]];then 
		of1st="$of1st ${fq#$iDIR\/}.txt"
	       	k1=$((k1+1))
	elif [[ $t2 != $fq ]];then 
		of2nd="$of2nd ${fq#$iDIR\/}.txt"
	       	k2=$((k2+1))
	else
	       	oList="$oList ${fq#$iDIR\/}.txt"
	       	i=$((i+1))
	fi
done

pid=$(ps -opid -C fastqReadAvg.pl)
pid=${pid/PID}
echo $pid
for id in $pid
do
   while ps -p $id >/dev/null 
   do 
       	sleep 30;
   done 
done

if [[ $i -ge 1 ]]; then
       	if [[ $i -eq 1 ]]; then
	       	mv $oList $outFile.freq.txt
       	else 
		$pDIR/summation.pl $oList $outFile.freq.txt
       	fi
       	$pDIR/combinedAvg.pl $outFile.freq.txt
       	$pDIR/empDist.pl $outFile.freq.txt $outFile.txt
       	if [[ $? == 0 ]]; then
	       	echo "The read profile file $outFile.txt has been created"
       	fi
       	rm -fr $oList $outFile.freq.txt
fi	

if [[ $k1 -ge 1 ]]; then
       	if [[ $k1 -eq 1 ]]; then
	       	mv $of1st ${outFile}R1.freq.txt
       	else 
		$pDIR/summation.pl $of1st ${outFile}R1.freq.txt
       	fi
       	$pDIR/combinedAvg.pl ${outFile}R1.freq.txt
       	$pDIR/empDist.pl ${outFile}R1.freq.txt ${outFile}R1.txt
       	if [[ $? == 0 ]]; then
	       	echo "The read profile file ${outFile}R1.txt has been created"
       	fi
       	rm -fr $of1st ${outFile}R1.freq.txt
fi

if [[ $k2 -ge 1 ]]; then
       	if [[ $k2 -eq 1 ]]; then
	       	mv $of2nd ${outFile}R2.freq.txt
       	else 
		$pDIR/summation.pl $of2nd ${outFile}R2.freq.txt
       	fi
       	$pDIR/combinedAvg.pl ${outFile}R2.freq.txt
       	$pDIR/empDist.pl ${outFile}R2.freq.txt ${outFile}R2.txt
       	if [[ $? == 0 ]]; then
	       	echo "The read profile file ${outFile}R2.txt has been created"
       	fi
       	rm -fr $of1st ${outFile}R2.freq.txt
fi
