#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <math.h>
#include "fdr_pwm.h"
#include "fdr_defines.h"

typedef struct local_minimum LocalMin;
struct local_minimum {
   int numSite1;
   double numSite0,FDR;
};

void sort_local(LocalMin *s2,int size);
int Compare_local(const void *s1,const void *s2);

double false_discovery(Subseq *topSubseq1,int numSeq1,Subseq *topSubseq0,int numBackgSets,
   double currentSpecifiedBound,int *numSite1,double *numSite0,int numTopSubseq1,int numTopSubseq0){

   register int i,ii,j;
   int numSelect1,numSelect0,numCombined,fdrControl;
   double aveNumSelected0,aveNumBackgSubSeq,actualFDR,cutoff1; 
   int cn;
   Segment *combinedTopSubSeq;
   LocalMin *tmp;

   combinedTopSubSeq=(Segment *) calloc((size_t)(numTopSubseq1+numTopSubseq0), 
				        sizeof(Segment));
   tmp=(LocalMin *)calloc(200,sizeof(LocalMin));

   // combine top-ranked subseqs from input and backg sequences
   for(ii=0; ii<numTopSubseq1; ii++){
       combinedTopSubSeq[ii].value=topSubseq1[ii].value;
       combinedTopSubSeq[ii].index=0;
   }
   for(ii=numTopSubseq1,j=0; ii<numTopSubseq1+numTopSubseq0; ii++,j++){
       combinedTopSubSeq[ii].value=topSubseq0[j].value;
       combinedTopSubSeq[ii].index=1;
   }
   // sort subseq by score
   sort_segments(combinedTopSubSeq,numTopSubseq1+numTopSubseq0);

   // treat all top-ranked subseqs in input seq binding sites
   cutoff1=topSubseq1[numTopSubseq1-1].value;

   // count number in input sequences that meet the criterion
   numSelect1=0;
   for(i=0; i<numTopSubseq1; i++){
      if(topSubseq1[i].value>=cutoff1) numSelect1++;
      else break;
   }
   // count number in backg sequences that meet the criterion
   numSelect0=0;
   for(i=0; i<numTopSubseq0; i++){
      if(topSubseq0[i].value>=cutoff1) numSelect0++;
      else break;
   }

   aveNumSelected0=(double)numSelect0/(double)numBackgSets;
   numCombined=numSelect1+numSelect0;

   aveNumBackgSubSeq=(double)numTopSubseq0/(double)numBackgSets;
   actualFDR=(double)(numTopSubseq1*aveNumSelected0)/(double)(aveNumBackgSubSeq*numSelect1);

   fdrControl=1; cn=0;
   while (actualFDR-currentSpecifiedBound>0) {
      for(j=numCombined-1; j>=0; j--){
         if(combinedTopSubSeq[j].value==cutoff1){
            if(combinedTopSubSeq[j].index==0) numSelect1--;
            else numSelect0--;
         }
         else { numCombined=j+1; cutoff1=combinedTopSubSeq[j].value; break;}
      }
      aveNumSelected0=(double)numSelect0/numBackgSets;
      if (numSelect1>0) {
         actualFDR=(double)(numTopSubseq1*aveNumSelected0)/(double)(aveNumBackgSubSeq*numSelect1);

         /*------------------------------------------------------------------------------------------
         |  for similar computed FDR, e.g., 0.200 vs 0.205, the number of binding sites selected can
         |  vary significant, e.g, 100 to 120. Since the computed FDR is finite, it can miss the
         |  specified FDR and select none or few binding sites. To alleviate this problem, fdrMotif
         |  find the FDR that selects the highest number of binding sites among all similar FDRs
         -------------------------------------------------------------------------------------------*/
         if (fabs(actualFDR-currentSpecifiedBound)<=currentSpecifiedBound*0.05 && cn<200) {
            //printf("%d %4.1f %8.6f %8.6f\n",numSelect1,aveNumSelected0,actualFDR,currentSpecifiedBound);
            tmp[cn].numSite1=numSelect1; tmp[cn].numSite0=aveNumSelected0; tmp[cn].FDR=actualFDR; cn++;
         }
      }
      else { fdrControl=0; break; } 
   }

   if (cn>0) {
      sort_local(tmp,cn); 

      /*-------------------------------------------------------------------------------
      |  for all similar computed FDRs, if the difference between the highest and lowest
      |  numbers of binding sites is more than 10%, it will take the highest number
      |  with a slightly larger FDR, otherwise fdrMotifs takes the computed FDR that
      |  is the closest to the specified FDR.
      -------------------------------------------------------------------------------*/
      
      if ((double) tmp[0].numSite1-tmp[cn-1].numSite1 > 0.05*tmp[cn-1].numSite1) {
         *numSite1=tmp[0].numSite1; 
         *numSite0=tmp[0].numSite0; 
         actualFDR=tmp[0].FDR;
      }
      else {
         *numSite1=numSelect1; *numSite0=aveNumSelected0; 
      }
   }
   else {
      *numSite1=numSelect1; *numSite0=aveNumSelected0; 
   }

   if (actualFDR>1) { 
      actualFDR=1; 
      *numSite1=(int) min((double) numSelect1, aveNumSelected0); 
      *numSite0=min((double) numSelect1, aveNumSelected0); 
   }
   if (combinedTopSubSeq) { free(combinedTopSubSeq); combinedTopSubSeq=NULL; }
   if (tmp) { free(tmp); tmp=NULL; }

   if (fdrControl) return (actualFDR);
   else            return (1);
}

void sort_local(LocalMin *s2,int size) {

   int (*compar)(const void *,const void *);
   compar=Compare_local;
   qsort((void *)s2,(size_t)size,sizeof(LocalMin),compar);
}

int Compare_local(const void *s1,const void *s2) {

   if (((LocalMin *)s1)->numSite1 < ((LocalMin *)s2)->numSite1) { return  1; }
   if (((LocalMin *)s1)->numSite1 > ((LocalMin *)s2)->numSite1) { return -1; }
      return 0;
}

