#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <malloc.h>
#include "roc_pwm.h"

int main(int argc,char **argv) {

   int numSeq1,numSeq0,pwmLen;
   int numM,numB;
   int *seqLen1,*seqLen0;
   char **seq1,**rseq1,**rseq0,**seq0,**geneID;
   double **pwm,**tpwm,*score0,*score1;
   double sumMin,sumMax,cutoff,aveLen,increment; 
   register int ii,i;
   FILE *fq;

   printf("\nThis program computes the ROC curve for a given pwn\n\n");
   if (argc!=5) { 
      printf("USAGE: rocPWM motifSeqFile BackgSeqFile pwmFile output\n\n"); 
      printf("  motifSeqFile:  file containing the motif sequences (true positive)\n");
      printf("  backgSeqFile:  file containing the background sequences (false positive)\n");
      printf("  pwmFile:       file containing the pwm\n");
      printf("  output:        file for the output\n\n");
      exit(0);
   }

   seqLen0=alloc_int(MAX_NUM_SEQ);
   seqLen1=alloc_int(MAX_NUM_SEQ);
   geneID=alloc_char_char(MAX_NUM_SEQ,500);

   seq1=read_seq(&numSeq1,seqLen1,geneID,MAX_NUM_SEQ,MAX_SEQ_LENGTH,argv[1]);
   aveLen=0; for (i=0; i<numSeq1; i++) aveLen +=seqLen1[i]; aveLen /=(double)numSeq1;
   printf("average seq length: %5.1f\n",aveLen);
   rseq1=alloc_char_char(MAX_NUM_SEQ,MAX_SEQ_LENGTH+1);
   reverse_seq(seq1,rseq1,numSeq1,seqLen1);

   seq0=read_seq(&numSeq0,seqLen0,geneID,MAX_NUM_SEQ,MAX_SEQ_LENGTH,argv[2]);
   aveLen=0; for (i=0; i<numSeq0; i++) aveLen +=seqLen0[i]; aveLen /=(double)numSeq0;
   printf("average seq length: %5.1f\n",aveLen);
   rseq0=alloc_char_char(MAX_NUM_SEQ,MAX_SEQ_LENGTH+1);
   reverse_seq(seq0,rseq0,numSeq0,seqLen0);

   score0=alloc_double(numSeq0);
   score1=alloc_double(numSeq1);

   pwm=read_pwm(&pwmLen,argv[3]);
   tpwm=alloc_double_double(MAX_PWM_LENGTH,4);
   transform_pwm(pwm,tpwm,pwmLen);
   sum_position_min(&(sumMin),tpwm,pwmLen);
   sum_position_max(&(sumMax),tpwm,pwmLen);

   score_seq(numSeq0,seq0,rseq0,seqLen0,tpwm,pwmLen,sumMin,sumMax,score0);
   score_seq(numSeq1,seq1,rseq1,seqLen1,tpwm,pwmLen,sumMin,sumMax,score1);

   fq=fopen(argv[4],"w");
   increment=(score0[0]-score0[numSeq0-1])/5000.0;
   for (ii=0; ii<=5000; ii++) {
      cutoff=score0[0]-ii*increment;
      numM=0;
      for (i=0; i<numSeq1; i++) {
         if (score1[i]>=cutoff) numM++;
         else break;
      }
      numB=0;
      for (i=0; i<numSeq0; i++) {
         if (score0[i]>=cutoff) numB++;
         else break;
      }
      fprintf(fq,"%5.4f\t%5.4f\t%5.4f\n",cutoff,(double)numM/(double)numSeq1,(double)numB/(double)numSeq0);
   }
   fclose(fq);
 
   if (seqLen1)     { free(seqLen1);      seqLen1=NULL;    }
   if (seqLen0)     { free(seqLen0);      seqLen0=NULL;    }
   if (seq0[0])     { free(seq0[0]);      seq0[0]=NULL;    }
   if (seq0)        { free(seq0);         seq0=NULL;       }
   if (seq1[0])     { free(seq1[0]);      seq1[0]=NULL;    }
   if (seq1)        { free(seq1);         seq1=NULL;       }
   if (geneID[0])   { free(geneID[0]);    geneID[0]=NULL;  }
   if (geneID)      { free(geneID);       geneID=NULL;     }
   if (pwm[0])      { free(pwm[0]);       pwm[0]=NULL;     }
   if (pwm)         { free(pwm);          pwm=NULL;        }
   if (tpwm[0])     { free(tpwm[0]);      tpwm[0]=NULL;    }
   if (tpwm)        { free(tpwm);         tpwm=NULL;       }

   return (1);
}
