#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <malloc.h>
#include "fdr_pwm.h"
#include "fdr_defines.h"

/* return the number of sites selected in each sequence */
void count_num_sites_per_seq(Subseq *topSubSeq,int numSite,int numSeq, int *numSitePerSeq) {

   register int i;

   for(i=0;  i<numSeq;  i++) numSitePerSeq[i]=0;
   for(i=0;  i<numSite; i++) (numSitePerSeq[topSubSeq[i].seqID])++;
}

/* if 10e-a< score <10e-(a-1),return a */ 
int range(double score) {

  register int i;
  double temp;
  int result;

  if (score>=1.0) {printf("wrong input\n"); exit(0);}
  else {
     temp=score;
     for(i=1; i<20; i++){
        temp=temp*pow(10,i);
        if(temp>=1.0) break;
     }
  }
  if(i==20) { printf("score is smaller than 10e-20\n");}
  result=i;
  return(result);
}

/* normalize_per_seq is the subroutine for normalization (don't consider the sequences below the threshold) */
void normalize_per_seq(double **score,double **rscore,int *seqLen,int motifLen,int numSeq1,int *numSitePerSeq){

   register int i,j,k,m;
   double seqSum,temp1,sum2;
   int rnorm,numSites,temp;

   // convert log scale to original scale
   for (i=0; i<numSeq1; i++){
      for(j=0; j<seqLen[i]-motifLen+1; j++){
         score[i][j]=exp(score[i][j]); 
         rscore[i][j]=exp(rscore[i][j]);
      }
   }

   for (i=0; i<numSeq1; i++){
      if (numSitePerSeq[i]>0){
         seqSum=0; for(j=0; j<seqLen[i]-motifLen+1; j++) seqSum+=(score[i][j]+rscore[i][j]); 
         for(j=0; j<seqLen[i]-motifLen+1; j++){
              score[i][j] =score[i][j] *(double)numSitePerSeq[i]/seqSum; 
              rscore[i][j]=rscore[i][j]*(double)numSitePerSeq[i]/seqSum;
         }
         numSites=numSitePerSeq[i];
         for (j=0; j<seqLen[i]-motifLen+1; j++){
            if(score[i][j]==1.0)  score[i][j] =1.0001;
            if(rscore[i][j]==1.0) rscore[i][j]=1.0001;
         }

         // one cycle squash as in MEME
         // since it is done once, the sum of all sites is guarranteed to be <=numSites
         do {
            rnorm=0;
            sum2=0;
            for(j=0; j<seqLen[i]-motifLen+1; j++){
               if (score[i][j]>1.0)  { score[i][j] =1.0; numSites--; rnorm=1; }
               if (rscore[i][j]>1.0) { rscore[i][j]=1.0; numSites--; rnorm=1; }
               if (score[i][j]<1.0)  { sum2+=score[i][j]; }
               if (rscore[i][j]<1.0) { sum2+=rscore[i][j];}
            }
            if (rnorm) {
               // to avoid dividing a small number by a small number
               if(sum2<10e-10 && numSites>0){
                  temp=range(sum2);
                  for(j=0;j<seqLen[i]-motifLen+1; j++){
                     if (score[i][j]!=1)  score[i][j] =(score[i][j]*numSites*pow(10,temp))/(sum2*pow(10,temp));
                     if (rscore[i][j]!=1) rscore[i][j]=(rscore[i][j]*numSites*pow(10,temp))/(sum2*pow(10,temp));
                  }
               }
               else{
                  for(j=0;j<seqLen[i]-motifLen+1; j++){
                     if (score[i][j]!=1)  score[i][j] = score[i][j]*(double)numSites/sum2;
                     if (rscore[i][j]!=1) rscore[i][j]=rscore[i][j]*(double)numSites/sum2;
                  }
               }
            }
         } while (rnorm);

         // smooth as in MEME
         for (m=0; m<motifLen; m++) {
            for (j=0;j<(seqLen[i]-m)/motifLen; j++){
               temp1=0.0; for(k=0; k<motifLen; k++) temp1+=(score[i][j*motifLen+m+k]+rscore[i][j*motifLen+m+k]);
               
               if (temp1>1.0) {
                  for (k=0; k<motifLen; k++){
                     score[i][j*motifLen+m+k]=score[i][j*motifLen+m+k]/temp1;
                     rscore[i][j*motifLen+m+k]=rscore[i][j*motifLen+m+k]/temp1;
                  }
               }
            }
         }
      }
      if (numSitePerSeq[i]==0){
         for (j=0;j<seqLen[i]-motifLen+1; j++) { score[i][j]=0.0; rscore[i][j]=0.0; }
      }
   }
}

