
/*----------------------------------------------------------------------------------------
   this subroutine generates background sequences. The background model is estimated 
   from the input sequences and the ordor of Markov model is determined based on
   the total number of nucleotides in the input sequences
----------------------------------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <malloc.h>
#include "fdr_pwm.h"
#include "alloc.h"
#include "random.h"

int select_order(int );
int count_base(int ,int *,char **);
int *count_nucleotides(char **,int ,int *,char **,int ,int );
int **generate_markov_chain0(double *,int ,int );
int **generate_markov_chain1(double *,double *,int ,int );
int **generate_markov_chain2(double *,double *,double *,int ,int );
int **generate_markov_chain3(double *,double *,double *,double *,int ,int );
int **generate_markov_chain4(double *,double *,double *,double *,double *,int ,int );
int **generate_markov_chain5(double *,double *,double *,double *,double *,double *,int ,int );
char **read_seq(int *,int *,char **,int ,int ,char *);
void compute_freq(int *,int ,double *);
void transition_1st(int *,double *);
void transition_2nd(int *,double *);
void transition_3rd(int *,double *);
void transition_4th(int *,double *);
void transition_5th(int *,double *);
void numerate_monomer_to_pentamer(char **,char **,char **,char **,char **,char **);

char **background(int numSeq,char **seq,int *seqLen,int aveLen,int numBackgSet,int numBackgSeq, double *backgfreq,
   int markovOrder) {

   register int i,j;
   int **s1;
   int numMonomer,numDimer,numTrimer,numTetramer,numPentamer,numHexamer,numBase,maxMarkovOrder;
   int *monomerCn,*dimerCn,*trimerCn,*tetramerCn,*pentamerCn,*hexamerCn;
   char **bSeq;
   char **monomer,**dimer,**trimer,**tetramer,**pentamer,**hexamer;
   double *monomerFreq, *dimerFreq,*trimerFreq,*tetramerFreq,*pentamerFreq,*hexamerFreq;

   numBase=count_base(numSeq,seqLen,seq);
   maxMarkovOrder=select_order(numBase); 
   printf("Total number of nucleotides in input sequences: \t%d\n",numBase);
   printf("The data are sufficient to estimate a %1d-order Markov background model\n",maxMarkovOrder);
   printf("\nSpecified Markov order of the background model: %1d\n\n",markovOrder);

   if (markovOrder>maxMarkovOrder) {
      printf("\n\nNote: the data are only sufficient to estimate up to %1d-order Markov model\n",maxMarkovOrder); 
      printf("Use a %1d-order Markov background model instead\n\n",maxMarkovOrder);
      markovOrder=maxMarkovOrder;
   }

   numMonomer=4; numDimer=16; numTrimer=64; numTetramer=256; numPentamer=1024; numHexamer=4096;

   monomer=alloc_char_char(4,2);
   dimer=alloc_char_char(16,3);
   trimer=alloc_char_char(64,4);
   tetramer=alloc_char_char(256,5);
   pentamer=alloc_char_char(1024,6);
   hexamer=alloc_char_char(4096,7);

   monomerFreq=alloc_double(4);
   dimerFreq=alloc_double(16);
   trimerFreq=alloc_double(64);
   tetramerFreq=alloc_double(256);
   pentamerFreq=alloc_double(1024);
   hexamerFreq=alloc_double(4096);
   monomerCn=alloc_int(4);
   monomerFreq=alloc_double(4);

   numerate_monomer_to_pentamer(monomer,dimer,trimer,tetramer,pentamer,hexamer);

   for (i=0; i<numSeq; i++) {
      for (j=0; j<seqLen[i]; j++) {
         switch (seq[i][j]) {
            case 'a': monomerCn[0]++; break; 
            case 'c': monomerCn[1]++; break; 
            case 'g': monomerCn[2]++; break; 
            case 't': monomerCn[3]++; break; 
            default: break; 
         } 
      } 
   }

   compute_freq(monomerCn,4,monomerFreq);

   backgfreq[0]=backgfreq[3]=(monomerFreq[0]+monomerFreq[3])/2;
   backgfreq[1]=backgfreq[2]=(monomerFreq[1]+monomerFreq[2])/2;

   if (markovOrder==0) {
      printf("generating 0th-order Markov model\n");

      s1=generate_markov_chain0(monomerFreq,numBackgSeq,aveLen);
   }
   else if (markovOrder==1) {
      printf("generating 1st-order Markov model\n");

      dimerCn   =count_nucleotides(seq,numSeq,seqLen,dimer,   numDimer,   2); 
      //compute_freq(dimerCn,16,dimerFreq);

      transition_1st(dimerCn,dimerFreq);

      s1=generate_markov_chain1(monomerFreq,dimerFreq,numBackgSeq,aveLen);
      if (dimerCn)   { free(dimerCn);    dimerCn=NULL;   }
   }
   else if (markovOrder==2) {
      printf("generating 2nd-order Markov model\n");

      dimerCn   =count_nucleotides(seq,numSeq,seqLen,dimer,   numDimer,   2); 
      trimerCn  =count_nucleotides(seq,numSeq,seqLen,trimer,  numTrimer,  3); 

      //compute_freq(dimerCn,16,dimerFreq);
      //compute_freq(trimerCn,64,trimerFreq);
      transition_1st(dimerCn,dimerFreq);
      transition_2nd(trimerCn,trimerFreq);

      s1=generate_markov_chain2(monomerFreq,dimerFreq,trimerFreq,numBackgSeq,aveLen);
      if (dimerCn)   { free(dimerCn);    dimerCn=NULL;   }
      if (trimerCn)  { free(trimerCn);   trimerCn=NULL;   }
   }
   else if (markovOrder==3) {
      printf("generating 3rd-order Markov model\n");

      dimerCn   =count_nucleotides(seq,numSeq,seqLen,dimer,   numDimer,   2); 
      trimerCn  =count_nucleotides(seq,numSeq,seqLen,trimer,  numTrimer,  3); 
      tetramerCn=count_nucleotides(seq,numSeq,seqLen,tetramer,numTetramer,4); 

      //compute_freq(dimerCn,16,dimerFreq);
      //compute_freq(trimerCn,64,trimerFreq);
      //compute_freq(tetramerCn,256,tetramerFreq);
      transition_1st(dimerCn,dimerFreq);
      transition_2nd(trimerCn,trimerFreq);
      transition_3rd(tetramerCn,tetramerFreq);

      s1=generate_markov_chain3(monomerFreq,dimerFreq,trimerFreq,tetramerFreq,numBackgSeq,aveLen);

      if (dimerCn)    { free(dimerCn);     dimerCn=NULL;    }
      if (trimerCn)   { free(trimerCn);    trimerCn=NULL;   }
      if (tetramerCn) { free(tetramerCn);  tetramerCn=NULL; }
   }
   else if (markovOrder==4) {
      printf("generating 4th-order Markov model\n");

      dimerCn   =count_nucleotides(seq,numSeq,seqLen,dimer,   numDimer,   2); 
      trimerCn  =count_nucleotides(seq,numSeq,seqLen,trimer,  numTrimer,  3); 
      tetramerCn=count_nucleotides(seq,numSeq,seqLen,tetramer,numTetramer,4); 
      pentamerCn=count_nucleotides(seq,numSeq,seqLen,pentamer,numPentamer,5); 

      //compute_freq(dimerCn,16,dimerFreq);
      //compute_freq(trimerCn,64,trimerFreq);
      //compute_freq(tetramerCn,256,tetramerFreq);
      //compute_freq(pentamerCn,1024,pentamerFreq);
      transition_1st(dimerCn,dimerFreq);
      transition_2nd(trimerCn,trimerFreq);
      transition_3rd(tetramerCn,tetramerFreq);
      transition_4th(pentamerCn,pentamerFreq);

      s1=generate_markov_chain4(monomerFreq,dimerFreq,trimerFreq,tetramerFreq,pentamerFreq,numBackgSeq,aveLen);

      if (dimerCn)    { free(dimerCn);     dimerCn=NULL;    }
      if (trimerCn)   { free(trimerCn);    trimerCn=NULL;   }
      if (tetramerCn) { free(tetramerCn);  tetramerCn=NULL; }
      if (pentamerCn) { free(pentamerCn);  pentamerCn=NULL; }
   }
   else if (markovOrder==5) {
      printf("generating 5th-order Markov model\n");

      dimerCn   =count_nucleotides(seq,numSeq,seqLen,dimer,   numDimer,   2); 
      trimerCn  =count_nucleotides(seq,numSeq,seqLen,trimer,  numTrimer,  3); 
      tetramerCn=count_nucleotides(seq,numSeq,seqLen,tetramer,numTetramer,4); 
      pentamerCn=count_nucleotides(seq,numSeq,seqLen,pentamer,numPentamer,5); 
      hexamerCn =count_nucleotides(seq,numSeq,seqLen,hexamer, numHexamer, 6);

      //compute_freq(dimerCn,16,dimerFreq);
      //compute_freq(trimerCn,64,trimerFreq);
      //compute_freq(tetramerCn,256,tetramerFreq);
      //compute_freq(pentamerCn,1024,pentamerFreq);
      //compute_freq(hexamerCn,4096,hexamerFreq);
      transition_1st(dimerCn,dimerFreq);
      transition_2nd(trimerCn,trimerFreq);
      transition_3rd(tetramerCn,tetramerFreq);
      transition_4th(pentamerCn,pentamerFreq);
      transition_5th(hexamerCn,hexamerFreq);
printf("done..transition\n");

      s1=generate_markov_chain5(monomerFreq,dimerFreq,trimerFreq,tetramerFreq,pentamerFreq,hexamerFreq,numBackgSeq,aveLen);

      if (dimerCn)    { free(dimerCn);     dimerCn=NULL;    }
      if (trimerCn)   { free(trimerCn);    trimerCn=NULL;   }
      if (tetramerCn) { free(tetramerCn);  tetramerCn=NULL; }
      if (pentamerCn) { free(pentamerCn);  pentamerCn=NULL; }
      if (hexamerCn)  { free(hexamerCn);   hexamerCn=NULL;  }
   }
   // print_chain(s1,markovLen,markovSeqLen,argv[6]);

   if (dimer[0])     { free(dimer[0]);      dimer[0]=NULL;     }
   if (dimer)        { free(dimer);         dimer=NULL;        }
   if (trimer[0])    { free(trimer[0]);     trimer[0]=NULL;    }
   if (trimer)       { free(trimer);        trimer=NULL;       }
   if (tetramer[0])  { free(tetramer[0]);   tetramer[0]=NULL;  }
   if (tetramer)     { free(tetramer);      tetramer=NULL;     }
   if (pentamer[0])  { free(pentamer[0]);   pentamer[0]=NULL;  }
   if (pentamer)     { free(pentamer);      pentamer=NULL;     }
   if (hexamer[0])   { free(hexamer[0]);    hexamer[0]=NULL;   }
   if (hexamer)      { free(hexamer);       hexamer=NULL;      }
   if (monomerCn)    { free(monomerCn);     monomerCn=NULL;    }
   if (monomerFreq)  { free(monomerFreq);   monomerFreq=NULL;  }
   if (dimerFreq)    { free(dimerFreq);     dimerFreq=NULL;    }
   if (trimerFreq)   { free(trimerFreq);    trimerFreq=NULL;   }
   if (tetramerFreq) { free(tetramerFreq);  tetramerFreq=NULL; }
   if (pentamerFreq) { free(pentamerFreq);  pentamerFreq=NULL; }
   if (hexamerFreq)  { free(hexamerFreq);   hexamerFreq=NULL;  }
 
   bSeq=alloc_char_char(numBackgSeq,aveLen+1);

   for (i=0; i<numBackgSeq; i++) {
      for (j=0; j<aveLen; j++) {
         switch (s1[i][j]) {
            case 0: bSeq[i][j]='a'; break;
            case 1: bSeq[i][j]='c'; break;
            case 2: bSeq[i][j]='g'; break;
            case 3: bSeq[i][j]='t'; break;
            default: break;
         }
      }
      bSeq[i][j]='\0';
   }
   if (s1[0])        { free(s1[0]);         s1[0]=NULL;        }
   if (s1)           { free(s1);            s1=NULL;           }

   return (bSeq);
}

void numerate_monomer_to_pentamer(char **monomer,char **dimer,char **trimer,char **tetramer,char **pentamer,char **hexamer) {

   register int i,j,k,l,m,n;
   int cn2,cn3,cn4,cn5,cn6;

   cn2=0; cn3=0; cn4=0; cn5=0; cn6=0;
   for (i=0; i<4; i++) {
      switch(i) {
         case 0:  monomer[i][0]='a'; break; 
         case 1:  monomer[i][0]='c'; break; 
         case 2:  monomer[i][0]='g'; break; 
         case 3:  monomer[i][0]='t'; break; 
         default: break;
      } 
      monomer[i][1]='\0';

      for (j=0; j<4; j++) {
         switch(i) {
            case 0:  dimer[cn2][0]='a'; break; 
            case 1:  dimer[cn2][0]='c'; break; 
            case 2:  dimer[cn2][0]='g'; break; 
            case 3:  dimer[cn2][0]='t'; break; 
            default: break;
         } 
         switch(j) { 
            case 0:  dimer[cn2][1]='a'; break; 
            case 1:  dimer[cn2][1]='c'; break; 
            case 2:  dimer[cn2][1]='g'; break; 
            case 3:  dimer[cn2][1]='t'; break; 
            default: break;
         } 
         dimer[cn2][2]='\0'; cn2++;

         for (k=0; k<4; k++) {
            switch(i) {
               case 0:  trimer[cn3][0]='a'; break; 
               case 1:  trimer[cn3][0]='c'; break; 
               case 2:  trimer[cn3][0]='g'; break; 
               case 3:  trimer[cn3][0]='t'; break; 
               default: break;
            } 
            switch(j) { 
               case 0:  trimer[cn3][1]='a'; break; 
               case 1:  trimer[cn3][1]='c'; break; 
               case 2:  trimer[cn3][1]='g'; break; 
               case 3:  trimer[cn3][1]='t'; break; 
               default: break;
            } 
            switch(k) {
               case 0:  trimer[cn3][2]='a'; break; 
               case 1:  trimer[cn3][2]='c'; break; 
               case 2:  trimer[cn3][2]='g'; break; 
               case 3:  trimer[cn3][2]='t'; break; 
               default: break;
            }
            trimer[cn3][3]='\0';  cn3++;

            for (l=0; l<4; l++) {
               switch(i) {
                  case 0:  tetramer[cn4][0]='a'; break; 
                  case 1:  tetramer[cn4][0]='c'; break; 
                  case 2:  tetramer[cn4][0]='g'; break; 
                  case 3:  tetramer[cn4][0]='t'; break; 
                  default: break;
               } 
               switch(j) { 
                  case 0:  tetramer[cn4][1]='a'; break; 
                  case 1:  tetramer[cn4][1]='c'; break; 
                  case 2:  tetramer[cn4][1]='g'; break; 
                  case 3:  tetramer[cn4][1]='t'; break; 
                  default: break;
               } 
               switch(k) {
                  case 0:  tetramer[cn4][2]='a'; break; 
                  case 1:  tetramer[cn4][2]='c'; break; 
                  case 2:  tetramer[cn4][2]='g'; break; 
                  case 3:  tetramer[cn4][2]='t'; break; 
                  default: break;
               } 
               switch(l) {
                  case 0:  tetramer[cn4][3]='a'; break; 
                  case 1:  tetramer[cn4][3]='c'; break; 
                  case 2:  tetramer[cn4][3]='g'; break; 
                  case 3:  tetramer[cn4][3]='t'; break; 
                  default: break;
               } 
               tetramer[cn4][4]='\0'; cn4++; 
       
               for (m=0; m<4; m++) {
                  switch(i) {
                     case 0:  pentamer[cn5][0]='a'; break; 
                     case 1:  pentamer[cn5][0]='c'; break; 
                     case 2:  pentamer[cn5][0]='g'; break; 
                     case 3:  pentamer[cn5][0]='t'; break; 
                     default: break;
                  } 
                  switch(j) { 
                     case 0:  pentamer[cn5][1]='a'; break; 
                     case 1:  pentamer[cn5][1]='c'; break; 
                     case 2:  pentamer[cn5][1]='g'; break; 
                     case 3:  pentamer[cn5][1]='t'; break; 
                     default: break;
                  } 
                  switch(k) {
                     case 0:  pentamer[cn5][2]='a'; break; 
                     case 1:  pentamer[cn5][2]='c'; break; 
                     case 2:  pentamer[cn5][2]='g'; break; 
                     case 3:  pentamer[cn5][2]='t'; break; 
                     default: break;
                  } 
                  switch(l) {
                     case 0:  pentamer[cn5][3]='a'; break; 
                     case 1:  pentamer[cn5][3]='c'; break; 
                     case 2:  pentamer[cn5][3]='g'; break; 
                     case 3:  pentamer[cn5][3]='t'; break; 
                     default: break;
                  } 
                  switch(m) { 
                     case 0:  pentamer[cn5][4]='a'; break; 
                     case 1:  pentamer[cn5][4]='c'; break; 
                     case 2:  pentamer[cn5][4]='g'; break; 
                     case 3:  pentamer[cn5][4]='t'; break; 
                     default: break;
                  }
                  pentamer[cn5][5]='\0'; cn5++; 

                  for (n=0; n<4; n++) {
                     switch(i) {
                        case 0:  hexamer[cn6][0]='a'; break; 
                        case 1:  hexamer[cn6][0]='c'; break; 
                        case 2:  hexamer[cn6][0]='g'; break; 
                        case 3:  hexamer[cn6][0]='t'; break; 
                        default: break;
                     } 
                     switch(j) { 
                        case 0:  hexamer[cn6][1]='a'; break; 
                        case 1:  hexamer[cn6][1]='c'; break; 
                        case 2:  hexamer[cn6][1]='g'; break; 
                        case 3:  hexamer[cn6][1]='t'; break; 
                        default: break;
                     } 
                     switch(k) {
                        case 0:  hexamer[cn6][2]='a'; break; 
                        case 1:  hexamer[cn6][2]='c'; break; 
                        case 2:  hexamer[cn6][2]='g'; break; 
                        case 3:  hexamer[cn6][2]='t'; break; 
                        default: break;
                     } 
                     switch(l) {
                        case 0:  hexamer[cn6][3]='a'; break; 
                        case 1:  hexamer[cn6][3]='c'; break; 
                        case 2:  hexamer[cn6][3]='g'; break; 
                        case 3:  hexamer[cn6][3]='t'; break; 
                        default: break;
                     } 
                     switch(m) { 
                        case 0:  hexamer[cn6][4]='a'; break; 
                        case 1:  hexamer[cn6][4]='c'; break; 
                        case 2:  hexamer[cn6][4]='g'; break; 
                        case 3:  hexamer[cn6][4]='t'; break; 
                        default: break;
                     }
                     switch(n) { 
                        case 0:  hexamer[cn6][5]='a'; break; 
                        case 1:  hexamer[cn6][5]='c'; break; 
                        case 2:  hexamer[cn6][5]='g'; break; 
                        case 3:  hexamer[cn6][5]='t'; break; 
                        default: break;
                     }
                     hexamer[cn6][6]='\0'; cn6++; 
                  }
               } 
            } 
         } 
      } 
   }
}

int *count_nucleotides(char **seq,int numSeq,int *seqLen,char **word,int numWord,int wordLen) {

   register int i,j,k,m;
   int *wordCn;
   char *s1;

   s1=alloc_char(wordLen+1);
   wordCn=alloc_int(numWord);

   for (m=0; m<numWord; m++) wordCn[m]=0;

   for (i=0; i<numSeq; i++) {
      for (j=0; j<seqLen[i]-wordLen+1; j++) {
         for (k=0; k<wordLen; k++) s1[k]=seq[i][j+k]; s1[k]='\0';
         for (m=0; m<numWord; m++) {
            if (strncmp(s1, word[m], (size_t) wordLen)==0) { (wordCn[m])++; break; } 
         }
      }
   }
   if (s1) { free(s1); s1=NULL; }

   return (wordCn);
}

void transition_1st(int *dimerCn,double *dimerFreq) {

   register int i,j;
   int cn1,cn2;
   double sum;

   /* dimer */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      sum=0;
      for (j=0; j<4; j++) { 
         if (dimerCn[cn1]==0) sum +=0.0001;
         else sum +=dimerCn[cn1]; 
         cn1++; 
      }
      for (j=0; j<4; j++)  { 
         if (dimerCn[cn2]==0) dimerFreq[cn2]=0.0001/sum;
         else dimerFreq[cn2]=(double)dimerCn[cn2]/sum; 
         cn2++; 
      }
   }
}

void transition_2nd(int *trimerCn,double *trimerFreq) {

   register int i,j,k;
   int cn1,cn2;
   double sum;

   /* trimer */
   cn1=0; cn2=0;
   for (k=0; k<4; k++) {
      for (i=0; i<4; i++) {
         sum=0;
         for (j=0; j<4; j++) { 
            if (trimerCn[cn1]==0) sum +=0.0001;
            else sum +=trimerCn[cn1]; 
            cn1++; 
         }
         for (j=0; j<4; j++)  { 
            if (trimerCn[cn2]==0) trimerFreq[cn2]=0.0001/sum;
            else trimerFreq[cn2]=(double)trimerCn[cn2]/sum; 
            cn2++; 
         }
      }
   }
}

void transition_3rd(int *tetramerCn,double *tetramerFreq) {

   register int i,j,k,l;
   int cn1,cn2;
   double sum;

   /* tetramer */
   cn1=0; cn2=0;
   for (l=0; l<4; l++) {
      for (k=0; k<4; k++) {
         for (i=0; i<4; i++) {
            sum=0;
            for (j=0; j<4; j++) { 
               if (tetramerCn[cn1]==0) sum +=0.0001;
               else sum +=tetramerCn[cn1]; 
               cn1++; 
            }
            for (j=0; j<4; j++)  { 
               if (tetramerCn[cn2]==0) tetramerFreq[cn2]=0.0001/sum;
               else tetramerFreq[cn2]=(double)tetramerCn[cn2]/sum; 
               cn2++; 
            }
         }
      }
   }
}

void transition_4th(int *pentamerCn,double *pentamerFreq) {

   register int i,j,k,l,m;
   int cn1,cn2;
   double sum;

   /* pentamer */
   cn1=0; cn2=0;
   for (m=0; m<4; m++) {
      for (l=0; l<4; l++) {
         for (k=0; k<4; k++) {
            for (i=0; i<4; i++) {
               sum=0;
               for (j=0; j<4; j++) { 
                  if (pentamerCn[cn1]==0) sum +=0.0001;
                  else sum +=pentamerCn[cn1]; 
                  cn1++; 
               }
               for (j=0; j<4; j++)  { 
                  if (pentamerCn[cn2]==0) pentamerFreq[cn2]=0.0001/sum;
                  else pentamerFreq[cn2]=(double)pentamerCn[cn2]/sum; 
                  cn2++; 
               }
            }
         }
      }
   }
}

void transition_5th(int *hexamerCn,double *hexamerFreq) {

   register int i,j,k,l,m,n;
   int cn1,cn2;
   double sum;

   /* hexamer */
   cn1=0; cn2=0;
   for (n=0; n<4; n++) {
      for (m=0; m<4; m++) {
         for (l=0; l<4; l++) {
            for (k=0; k<4; k++) {
               for (i=0; i<4; i++) {
                  sum=0;
                  for (j=0; j<4; j++) { 
                     if (hexamerCn[cn1]==0) sum +=0.0001;
                     else sum +=hexamerCn[cn1]; 
                     cn1++; 
                  }
                  for (j=0; j<4; j++)  { 
                     if (hexamerCn[cn2]==0) hexamerFreq[cn2]=0.0001/sum;
                     else hexamerFreq[cn2]=(double)hexamerCn[cn2]/sum; 
                     cn2++; 
                  }
               }
            }
         }
      }
   }
}

void compute_freq(int *count,int numCount,double *freq) {

   register int i;
   int sum;

   sum=0; for (i=0; i<4; i++) sum+=count[i];
   for (i=0; i<4; i++) freq[i]=(double)count[i]/(double)sum;
}

int select_order(int numBase) {

   double x;

   x=log(numBase/4096.0)/log(2.0)/2.0;
   if (x<0.5)                 return (0);
   else if (x>=0.5 && x <1.5) return (1);
   else if (x>=1.5 && x <2.5) return (2);
   else if (x>=2.5 && x <3.5) return (3);
   else if (x>=3.5 && x <4.5) return (4);
   else if (x>=4.5 && x <5.5) return (5);
   else return (5);
}


int count_base(int numSeq,int *seqLen,char **seq) {

   register int i,j;
   int cn;

   cn=0;
   for (i=0; i<numSeq; i++) {
      for (j=0; j<seqLen[i]; j++) {
         if (seq[i][j]!='n') cn++; 
      } 
   }
   return (cn);
}

int **generate_markov_chain0(double *monomerFreq,int numSeq,int seqLen) {

   register int ii,i,j;
   int **s1;
   double rand;
   double bin[5];

   s1=alloc_int_int(numSeq,seqLen+1);

   bin[0]=0; bin[1]=monomerFreq[0]; bin[2]=monomerFreq[0]+monomerFreq[1]; 
   bin[3]=monomerFreq[0]+monomerFreq[1]+monomerFreq[2]; bin[4]=1;

   // for (j=0; j<4; j++) printf("%6.4f\n",monomerFreq[j]);

   for (ii=0; ii<numSeq; ii++) {
      for (i=0; i<seqLen; i++) {
         rand=genrand();
         /* 0th */
         for (j=0; j<4; j++) {
            // printf("random: %5.3f bin=%5.3f\n",rand,bin[j]);
            if (rand>=bin[j] && rand<=bin[j+1]) { s1[ii][i]=j; break; } 
         }
      }
   }
   return (s1);
}
int **generate_markov_chain1(double *monomerFreq,double *dimerFreq,int numSeq,int seqLen) {

   register int ii,i,j,k;
   int found,offset;
   int **s1;
   double rand;
   double bin[5];

   s1=alloc_int_int(numSeq,seqLen+1);

   bin[0]=0; bin[1]=monomerFreq[0]; bin[2]=monomerFreq[0]+monomerFreq[1]; 
   bin[3]=monomerFreq[0]+monomerFreq[1]+monomerFreq[2]; bin[4]=1;

   // for (j=0; j<4; j++) printf("%6.4f\n",monomerFreq[j]);

   for (ii=0; ii<numSeq; ii++) {
       rand=genrand();
      /* 0th */
      for (j=0; j<4; j++) {
         // printf("random: %5.3f bin=%5.3f\n",rand,bin[j]);
         if (rand>=bin[j] && rand<=bin[j+1]) { s1[ii][0]=j; break; } 
      }
      for (i=1; i<seqLen; i++) {
         /* 1st */
         found=0; 
         for (j=0; j<4; j++) {
            if (s1[ii][i-1]==j) {
               offset=j*4; 
               bin[0]=0; 
               bin[1]=dimerFreq[0+offset]; 
               bin[2]=dimerFreq[0+offset]+dimerFreq[1+offset];
               bin[3]=dimerFreq[0+offset]+dimerFreq[1+offset]+dimerFreq[2+offset]; 
               bin[4]=1;
   
               rand=genrand();
               for (k=0; k<4; k++) {
                  if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][i]=k; found=1; break; }
               }
               if (found) break;
            }
         }
      }
   }
   return (s1);
}
int **generate_markov_chain2(double *monomerFreq,double *dimerFreq,double *trimerFreq,int numSeq,int seqLen) {

   register int ii,jj,i,j,k;
   int found,offset;
   int **s1;
   double rand;
   double bin[5];

   s1=alloc_int_int(numSeq,seqLen+1);

   bin[0]=0; bin[1]=monomerFreq[0]; bin[2]=monomerFreq[0]+monomerFreq[1]; 
   bin[3]=monomerFreq[0]+monomerFreq[1]+monomerFreq[2]; bin[4]=1;

   // for (j=0; j<4; j++) printf("%6.4f\n",monomerFreq[j]);

   for (ii=0; ii<numSeq; ii++) {
      /* 0th */
      rand=genrand();
      for (j=0; j<4; j++) {
         // printf("random: %5.3f bin=%5.3f\n",rand,bin[j]);
         if (rand>=bin[j] && rand<=bin[j+1]) { s1[ii][0]=j; break; } 
      }
      /* 1st */
      found=0; 
      for (j=0; j<4; j++) {
         if (s1[ii][0]==j) {
            offset=j*4; 
            bin[0]=0; 
            bin[1]=dimerFreq[0+offset]; 
            bin[2]=dimerFreq[0+offset]+dimerFreq[1+offset];
            bin[3]=dimerFreq[0+offset]+dimerFreq[1+offset]+dimerFreq[2+offset]; 
            bin[4]=1;
   
            rand=genrand();
            for (k=0; k<4; k++) {
               if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][1]=k; found=1; break; }
            }
            if (found) break;
         }
      }
      for (jj=2; jj<seqLen; jj++) {
         /* 2nd */
         found=0;
         for (i=0; i<4; i++) {
            for (j=0; j<4; j++) {
               if (s1[ii][jj-2]==i && s1[ii][jj-1]==j) {
                  offset=i*16+j*4;
                  bin[0]=0; 
                  bin[1]=trimerFreq[0+offset]; 
                  bin[2]=trimerFreq[0+offset]+trimerFreq[1+offset];
                  bin[3]=trimerFreq[0+offset]+trimerFreq[1+offset]+trimerFreq[2+offset]; 
                  bin[4]=1;
      
                  rand=genrand();
                  for (k=0; k<4; k++) {
                     if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][jj]=k; found=1; break; }
                  }
                  if (found) break;
               }
            }
            if (found) break;
         }
      }
   }
   return (s1);
}
int **generate_markov_chain3(double *monomerFreq,double *dimerFreq,double *trimerFreq,double *tetramerFreq,
   int numSeq,int seqLen) {

   register int ii,jj,i,j,k,l;
   int found,offset;
   int **s1;
   double rand;
   double bin[5];

   s1=alloc_int_int(numSeq,seqLen+1);

   bin[0]=0; bin[1]=monomerFreq[0]; bin[2]=monomerFreq[0]+monomerFreq[1]; 
   bin[3]=monomerFreq[0]+monomerFreq[1]+monomerFreq[2]; bin[4]=1;

   //for (j=0; j<4; j++) printf("%6.4f\n",monomerFreq[j]);

   for (ii=0; ii<numSeq; ii++) {
      /* 0th */
      rand=genrand();
      for (j=0; j<4; j++) {
         //printf("random: %5.3f bin=%5.3f\n",rand,bin[j]);
         if (rand>=bin[j] && rand<=bin[j+1]) { s1[ii][0]=j; break; } 
      }
      /* 1st */
      found=0; 
      for (j=0; j<4; j++) {
         if (s1[ii][0]==j) {
            offset=j*4; 
            bin[0]=0; 
            bin[1]=dimerFreq[0+offset]; 
            bin[2]=dimerFreq[0+offset]+dimerFreq[1+offset];
            bin[3]=dimerFreq[0+offset]+dimerFreq[1+offset]+dimerFreq[2+offset]; 
            bin[4]=1;
   
            rand=genrand();
            for (k=0; k<4; k++) {
               if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][1]=k; found=1; break; }
            }
            if (found) break;
         }
      }
      /* 2nd */
      found=0;
      for (i=0; i<4; i++) {
         for (j=0; j<4; j++) {
            if (s1[ii][0]==i && s1[ii][1]==j) {
               offset=i*16+j*4;
               bin[0]=0; 
               bin[1]=trimerFreq[0+offset]; 
               bin[2]=trimerFreq[0+offset]+trimerFreq[1+offset];
               bin[3]=trimerFreq[0+offset]+trimerFreq[1+offset]+trimerFreq[2+offset]; 
               bin[4]=1;
   
               rand=genrand();
               for (k=0; k<4; k++) {
                  if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][2]=k; found=1; break; }
               }
               if (found) break;
            }
         }
         if (found) break;
      }
      for (jj=3; jj<seqLen; jj++) {
         /* 3rd */
         found=0;
         for (i=0; i<4; i++) {
            for (j=0; j<4; j++) {
               for (l=0; l<4; l++) {
                  if (s1[ii][jj-3]==i && s1[ii][jj-2]==j && s1[ii][jj-1]==l) {
                     offset=i*64+j*16+l*4;
                     bin[0]=0; 
                     bin[1]=tetramerFreq[0+offset]; 
                     bin[2]=tetramerFreq[0+offset]+tetramerFreq[1+offset];
                     bin[3]=tetramerFreq[0+offset]+tetramerFreq[1+offset]+tetramerFreq[2+offset]; 
                     bin[4]=1;
         
                     rand=genrand();
                     for (k=0; k<4; k++) {
                        if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][jj]=k; found=1; break; }
                     }
                     if (found) break;
                  }
               }
               if (found) break;
            }
            if (found) break;
         }
      }
   }
   return (s1);
}

int **generate_markov_chain4(double *monomerFreq,double *dimerFreq,double *trimerFreq,double *tetramerFreq,double *pentamerFreq,
   int numSeq,int seqLen) {

   register int ii,jj,i,j,k,l,m;
   int found,offset;
   int **s1;
   double rand;
   double bin[5];

   s1=alloc_int_int(numSeq,seqLen+1);

   bin[0]=0; bin[1]=monomerFreq[0]; bin[2]=monomerFreq[0]+monomerFreq[1]; 
   bin[3]=monomerFreq[0]+monomerFreq[1]+monomerFreq[2]; bin[4]=1;

   for (ii=0; ii<numSeq; ii++) {
      /* 0th */
      rand=genrand();
      for (j=0; j<4; j++) {
         if (rand>=bin[j] && rand<=bin[j+1]) { s1[ii][0]=j; break; } 
      }
      /* 1st */
      found=0; 
      for (j=0; j<4; j++) {
         if (s1[ii][0]==j) {
            offset=j*4; 
            bin[0]=0; 
            bin[1]=dimerFreq[0+offset]; 
            bin[2]=dimerFreq[0+offset]+dimerFreq[1+offset];
            bin[3]=dimerFreq[0+offset]+dimerFreq[1+offset]+dimerFreq[2+offset]; 
            bin[4]=1;
   
            rand=genrand();
            for (k=0; k<4; k++) {
               if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][1]=k; found=1; break; }
            }
            if (found) break;
         }
      }
      /* 2nd */
      found=0;
      for (i=0; i<4; i++) {
         for (j=0; j<4; j++) {
            if (s1[ii][0]==i && s1[ii][1]==j) {
               offset=i*16+j*4;
               bin[0]=0; 
               bin[1]=trimerFreq[0+offset]; 
               bin[2]=trimerFreq[0+offset]+trimerFreq[1+offset];
               bin[3]=trimerFreq[0+offset]+trimerFreq[1+offset]+trimerFreq[2+offset]; 
               bin[4]=1;
   
               rand=genrand();
               for (k=0; k<4; k++) {
                  if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][2]=k; found=1; break; }
               }
               if (found) break;
            }
         }
         if (found) break;
      }
      /* 3rd */
      found=0;
      for (i=0; i<4; i++) {
         for (j=0; j<4; j++) {
            for (l=0; l<4; l++) {
               if (s1[ii][0]==i && s1[ii][1]==j && s1[ii][2]==l) {
                  offset=i*64+j*16+l*4;
                  bin[0]=0; 
                  bin[1]=tetramerFreq[0+offset]; 
                  bin[2]=tetramerFreq[0+offset]+tetramerFreq[1+offset];
                  bin[3]=tetramerFreq[0+offset]+tetramerFreq[1+offset]+tetramerFreq[2+offset]; 
                  bin[4]=1;
      
                  rand=genrand();
                  for (k=0; k<4; k++) {
                     if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][3]=k; found=1; break; }
                  }
                  if (found) break;
               }
            }
            if (found) break;
         }
         if (found) break;
      }
   
      for (jj=4; jj<seqLen; jj++) {
         /* 4th */
         found=0;
         for (i=0; i<4; i++) {
            for (j=0; j<4; j++) {
               for (l=0; l<4; l++) {
                  for (m=0; m<4; m++) {
                     if (s1[ii][jj-4]==i && s1[ii][jj-3]==j && s1[ii][jj-2]==l && s1[ii][jj-1]==m) {
                        offset=i*256+j*64+l*16+m*4;
                        bin[0]=0; 
                        bin[1]=pentamerFreq[0+offset]; 
                        bin[2]=pentamerFreq[0+offset]+pentamerFreq[1+offset];
                        bin[3]=pentamerFreq[0+offset]+pentamerFreq[1+offset]+pentamerFreq[2+offset]; 
                        bin[4]=1;
            
                        rand=genrand();
                        for (k=0; k<4; k++) {
                           if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][jj]=k; found=1; break; }
                        }
                        if (found) break;
                     }
                  }
                  if (found) break;
               }
               if (found) break;
            }
            if (found) break;
         }
      }
   }
   return (s1);
}

int **generate_markov_chain5(double *monomerFreq,double *dimerFreq,double *trimerFreq,double *tetramerFreq,
   double *pentamerFreq,double *hexamerFreq,int numSeq,int seqLen) {

   register int ii,jj,i,j,k,l,m,n;
   int found,offset;
   int **s1;
   double rand;
   double bin[5];

   s1=alloc_int_int(numSeq,seqLen+1);

   bin[0]=0; bin[1]=monomerFreq[0]; bin[2]=monomerFreq[0]+monomerFreq[1]; 
   bin[3]=monomerFreq[0]+monomerFreq[1]+monomerFreq[2]; bin[4]=1;

   for (ii=0; ii<numSeq; ii++) {
      /* 0th */
      rand=genrand();
      for (j=0; j<4; j++) {
         if (rand>=bin[j] && rand<=bin[j+1]) { s1[ii][0]=j; break; } 
      }
      /* 1st */
      found=0; 
      for (j=0; j<4; j++) {
         if (s1[ii][0]==j) {
            offset=j*4; 
            bin[0]=0; 
            bin[1]=dimerFreq[0+offset]; 
            bin[2]=dimerFreq[0+offset]+dimerFreq[1+offset];
            bin[3]=dimerFreq[0+offset]+dimerFreq[1+offset]+dimerFreq[2+offset]; 
            bin[4]=1;
   
            rand=genrand();
            for (k=0; k<4; k++) {
               if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][1]=k; found=1; break; }
            }
            if (found) break;
         }
      }
      /* 2nd */
      found=0;
      for (i=0; i<4; i++) {
         for (j=0; j<4; j++) {
            if (s1[ii][0]==i && s1[ii][1]==j) {
               offset=i*16+j*4;
               bin[0]=0; 
               bin[1]=trimerFreq[0+offset]; 
               bin[2]=trimerFreq[0+offset]+trimerFreq[1+offset];
               bin[3]=trimerFreq[0+offset]+trimerFreq[1+offset]+trimerFreq[2+offset]; 
               bin[4]=1;
   
               rand=genrand();
               for (k=0; k<4; k++) {
                  if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][2]=k; found=1; break; }
               }
               if (found) break;
            }
         }
         if (found) break;
      }
      /* 3rd */
      found=0;
      for (i=0; i<4; i++) {
         for (j=0; j<4; j++) {
            for (l=0; l<4; l++) {
               if (s1[ii][0]==i && s1[ii][1]==j && s1[ii][2]==l) {
                  offset=i*64+j*16+l*4;
                  bin[0]=0; 
                  bin[1]=tetramerFreq[0+offset]; 
                  bin[2]=tetramerFreq[0+offset]+tetramerFreq[1+offset];
                  bin[3]=tetramerFreq[0+offset]+tetramerFreq[1+offset]+tetramerFreq[2+offset]; 
                  bin[4]=1;
      
                  rand=genrand();
                  for (k=0; k<4; k++) {
                     if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][3]=k; found=1; break; }
                  }
                  if (found) break;
               }
            }
            if (found) break;
         }
         if (found) break;
      }
   
      /* 4th */
      found=0;
      for (i=0; i<4; i++) {
         for (j=0; j<4; j++) {
            for (l=0; l<4; l++) {
               for (m=0; m<4; m++) {
                  if (s1[ii][jj-4]==i && s1[ii][jj-3]==j && s1[ii][jj-2]==l && s1[ii][jj-1]==m) {
                     offset=i*256+j*64+l*16+m*4;
                     bin[0]=0; 
                     bin[1]=pentamerFreq[0+offset]; 
                     bin[2]=pentamerFreq[0+offset]+pentamerFreq[1+offset];
                     bin[3]=pentamerFreq[0+offset]+pentamerFreq[1+offset]+pentamerFreq[2+offset]; 
                     bin[4]=1;
            
                     rand=genrand();
                     for (k=0; k<4; k++) {
                        if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][jj]=k; found=1; break; }
                     }
                     if (found) break;
                  }
               }
               if (found) break;
            }
            if (found) break;
         }
         if (found) break;
      }

      for (jj=5; jj<seqLen; jj++) {
         /* 5th */
         found=0;
         for (i=0; i<4; i++) {
            for (j=0; j<4; j++) {
               for (l=0; l<4; l++) {
                  for (m=0; m<4; m++) {
                     for (n=0; n<4; n++) {
                        if (s1[ii][jj-5]==i && s1[ii][jj-4]==j && s1[ii][jj-3]==l && s1[ii][jj-2]==m && s1[ii][jj-1]==n) {
                           offset=i*1024+j*256+l*64+m*16+n*4;
                           bin[0]=0; 
                           bin[1]=hexamerFreq[0+offset]; 
                           bin[2]=hexamerFreq[0+offset]+hexamerFreq[1+offset];
                           bin[3]=hexamerFreq[0+offset]+hexamerFreq[1+offset]+hexamerFreq[2+offset]; 
                           bin[4]=1;
            
                           rand=genrand();
                           for (k=0; k<4; k++) {
                              if (rand>=bin[k] && rand<=bin[k+1]) { s1[ii][jj]=k; found=1; break; }
                           }
                           if (found) break;
                        }
                     }
                     if (found) break;
                  }
                  if (found) break;
               }
               if (found) break;
            }
            if (found) break;
         }
      }
   }
   return (s1);
}

