#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include "fdr_pwm.h"
#include "fdr_defines.h"

// select Cmax non-overlapping highest scoring subseqs from each seq*/
// it is possible that only 'numSelected' can be selected; numSelected<Cmax
int select_top_Cmax_subsequences(Segment *topSubSeq,Segment *allSubSeq,int totalSubSeq,
   int Cmax,int motifLen){

   register int j;
   int overlap,numSelected,cn;

   topSubSeq[0].value=allSubSeq[0].value;
   topSubSeq[0].index=allSubSeq[0].index;
   topSubSeq[0].strand=allSubSeq[0].strand;

   numSelected=1; cn=1;
   while(cn<totalSubSeq){
      if(numSelected==Cmax) break;
      else {
         overlap=0;
         for(j=0;j<numSelected; j++){
            if (allSubSeq[cn].index>=topSubSeq[j].index && allSubSeq[cn].index<=topSubSeq[j].index+motifLen-1) { 
               overlap=1; break; 
            }
            if (topSubSeq[j].index>=allSubSeq[cn].index && topSubSeq[j].index<=allSubSeq[cn].index+motifLen-1) { 
               overlap=1; break; 
            }
         }
         if(!overlap){
            topSubSeq[numSelected].value=allSubSeq[cn].value;
            topSubSeq[numSelected].index=allSubSeq[cn].index;
            topSubSeq[numSelected].strand=allSubSeq[cn].strand;
            numSelected++;
         }
      }
      cn++;
   }
   return (numSelected);
}

/* Return KC_max subsequences in the whole dataset,and record the location of the subsequence */
int select_top_subseqs(Subseq *allTopSubSeq,double **score,double **rScore,int numSeq,
   int *seqLen,int motifLen,int Cmax,int maxSeqLen){

   register int i,k;
   Segment *allSubseq; // all subsequences in a seq
   Segment *topSubSeq; // top Cmax from in a seq
   int cn,numSelectPerSeq,totalTopSubseq;

   // allSubseq stores all 2(L-w+1) scores for a sequence
   allSubseq = (Segment *)calloc((size_t)(2*(maxSeqLen-motifLen+1)),
                                 sizeof(Segment));
   // topSubSeq stores top Cmax subsequences among all 2(L-w+1)
   topSubSeq = (Segment *)calloc((size_t)Cmax,sizeof(Segment));

   totalTopSubseq=0;
   for(i=0;  i<numSeq; i++){
      cn=0;
      for(k=0; k<seqLen[i]-motifLen+1; k++){
         allSubseq[cn].value=score[i][k];
         allSubseq[cn].index=k;
         allSubseq[cn].strand=1;
         cn++;
         allSubseq[cn].value=rScore[i][k];
         allSubseq[cn].index=k;
         allSubseq[cn].strand=-1;
         cn++;
      }

      // sort all 2(L-w+1) subsequences by their scores
      // shaker_sort_decending(allSubseq,cn,min(2*(seqLen[i]-motifLen+1),Cmax*2*motifLen));

      if (seqLen[i]<=motifLen) numSelectPerSeq=0; 
      else {
         // sort all segments in a seq and select at most top Cmax non-overlapping subsequences
         sort_segments(allSubseq,2*(seqLen[i]-motifLen+1));
         numSelectPerSeq=select_top_Cmax_subsequences(topSubSeq,allSubseq,2*(seqLen[i]-motifLen+1),Cmax,motifLen);
      }

      // add top subsequences from one seq to all top subseqs
      for(k=0; k<numSelectPerSeq; k++){
         allTopSubSeq[totalTopSubseq].value=topSubSeq[k].value;
         allTopSubSeq[totalTopSubseq].seqID=i;
         allTopSubSeq[totalTopSubseq].posID=topSubSeq[k].index;
         allTopSubSeq[totalTopSubseq].strand=topSubSeq[k].strand;
         totalTopSubseq++;
      }
   }
   if(allSubseq)   { free(allSubseq); allSubseq=NULL; }
   if(topSubSeq)   { free(topSubSeq); topSubSeq=NULL; }

   return (totalTopSubseq);
}

