#!/usr/bin/perl -w

#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Create distribution plots from the histogram data output from OMiMa
# The program requires the following two free programs installed
# 1) perl (http://www.perl.org/): default path /usr/bin/perl
# 2) gnuplot (http://www.gnuplot.info/): default path /usr/bin/gnuplot 
  
# Copyright 2005 Weichun Huang. All rights reserved.
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

use strict;
use FileHandle;

#change this if GNUPLOT is stalled some other directory
my $GNUPLOT="/usr/bin/gnuplot"; 

#check whether the GNUPLOT installed
if (! (-e $GNUPLOT and -x $GNUPLOT)){
	print "No GNUPLOT program found! Please install GNUPLOT first.\n";
	exit;
}

my ($inDir, $fileExt, $fmt, $xlab, $ylab, $numOfBin)=@ARGV;
# default number of bins of histogram
$numOfBin=20;
if(@ARGV==5){ 
	($inDir, $fileExt, $fmt, $xlab, $ylab)=@ARGV;
}
elsif(@ARGV==6){ 
	($inDir, $fileExt, $fmt, $xlab, $ylab, $numOfBin)=@ARGV;
}
else { 
	print "Usage:$0 inFolder InFiles graphFormt xLabel YLabel [numOfBin]\n";
	print <<"EXAMPLE";

	graphFormt -- commonly used graph formts are eps, png, jpg  

	Example 1: single file mode
		
		To plot location histgram of TfbsOne that :
		1)  Input file is TfbsOne.loc
		2)  Input file is in directory ./file_path  
		3)  Output histogram is EPS figure  
		4)  x-axis label is "Location" and Y-axis is "Frequency"
		
		The plot command will be:		
		$0 ./file_path TfbsOne.loc eps Location Frequency

	Example 2: Multiple files mode

		To plot histgram of several files whose filenames end with same suffix, e.g. *.loc, 
		and output histgram is PNG figure
		The plot command will be:		
		$0  ./file_path loc png Location Frequency
EXAMPLE

	exit;
}
opendir(DIR, $inDir) || die "can't opendir $inDir: $!";
my @dataFile = grep { /$fileExt$/ && -f "$inDir/$_" } readdir(DIR);
closedir DIR;

my @aa;
foreach my $fl(@dataFile){
	my %freq;
	my ($Min, $Max);
	my $name;
	my @hist=();
#	print "$fl\n";
	open(MYFILE, "<$inDir/$fl") or die $!;
	while (my $buff=<MYFILE>){
		chomp $buff;
		if($buff!~/\d/){ next; }
		@aa=split /\s/,$buff;
		next if($aa[1]==0);
		$freq{$aa[0]}=$aa[1];
	}
	close(MYFILE);
	my @xvalue=sort {$a<=>$b} (keys %freq);
	my %newFreq;
	my $inFile="$inDir/$fl";
	my $binSize;
	if(@xvalue<$numOfBin){
		$numOfBin=scalar @xvalue;
		$binSize=($xvalue[-1]-$xvalue[0])/$numOfBin; 
		%newFreq=%freq;
		%freq=();
	}
	else{
		$binSize=($xvalue[-1]-$xvalue[0])/$numOfBin; 
		my $bin=$binSize+$xvalue[0];
		my ($xIdx, $ycc)=0;
		my $count=0;
		foreach my $x (@xvalue){
			if($x < $bin){ 
				$count++;
				$xIdx+=$x;
				$ycc+=$freq{$x};
			}
			else{
				$xIdx=$bin-$binSize/2;
				$bin+=$binSize;
				next if($count==0);
#				$xIdx/=$count;
				$newFreq{$xIdx} =$ycc; 
				($xIdx, $ycc, $count) = (0, 0, 0);
			}
		}
		if($count>0){
			$xIdx/=$count;
			$newFreq{$xIdx} =$ycc; 
			($xIdx, $ycc, $count) = (0, 0, 0);
		}
		%freq=();
		open(TTEMP, ">$inDir/temp.temp") or die $!;
		foreach (keys %newFreq){
			printf TTEMP "%f\t%d\n", $_, $newFreq{$_};
		}
		close(TTEMP);
		$inFile="temp.temp";
	} 
	next if($numOfBin<2);
	my $title="OMiMa Histogram Plot";
	my $outFile=$fl;
	$outFile=~s/\.(\w+)$//;
	my $ext=$1; $outFile.="-loc" if($ext=~/loc$/);
	$outFile="$inDir/$outFile" . ".$fmt";
	my $FMT=$fmt;
	if($fmt eq "eps"){
		$FMT="postscript eps color solid \"Helvetica\"";
	}
	$binSize = $binSize*.90;
	open (HISTPLOT, "| $GNUPLOT") or die "no gnuplot";
	HISTPLOT->autoflush(1);
	print HISTPLOT <<"GNUPLOT_COMMANDS";
	cd '$inDir'
		set term $FMT
		set size 1, 1
		set grid noxtics ytics
		set pointsize 3
		set bar 3
		set origin 0,0
		set autoscale
		set key off
		set rmargin 5
		set label 'OMiMa Histogram Plot' at screen 0.99, screen .99 right rotate by 90 textcolor lt 1	
		set boxwidth $binSize 
		set style fill solid 0.9
		set output "$outFile"
		set xlabel "$xlab"
		set ylabel "$ylab"
		set noxzeroaxis
		set noyzeroaxis
		set border
		plot "$inFile" using 1:2 with boxes lt 3
GNUPLOT_COMMANDS
		close (HISTPLOT);

		if( -f "$inDir/temp.temp"){
			unlink  "$inDir/temp.temp";
		}
}

