#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif

#include <math.h>
#include <ctype.h>
#include <sys/types.h>
#include <unistd.h>
#include "defines.h"

#define PSEUDO_FREQ	0.000000001

typedef struct bsite_info BACKG_SITE;
struct bsite_info {
   int seq,pos;
   double score;
};

void nonACGT(BACKGROUND_Model *back);
void transition_1st(double *,double *);
void transition_2nd(double *,double *);
void transition_3rd(double *,double *);
void transition_4th(double *,double *);
void transition_5th(double *,double *);
void transition_6th(double *,double *);
void transition_7th(double *,double *);
void transition_8th(double *,double *);
void transition_9th(double *,double *);

char *alloc_char(int );
char **alloc_char_char(int ,int );
double *alloc_double(int );
int *alloc_int(int );

int read_userBackgModel(char *fileName,BACKGROUND_Model *back) {

   FILE *fp;
   char *buffer,*tok,*s1;
   int *cn,maxOligomer,len,len2,tabFound,maxAllowedOligomer,order;
   register int ii,i;
   double freq,*sum;

   fp=fopen(fileName,"r");
   if (!fp) { perror(fileName); exit(0); }

   printf("\nreading background model file:\t%s\n",fileName);

   maxAllowedOligomer=MAX_ALLOWED_ORDER+1;
   buffer=alloc_char(256);
   s1=alloc_char(maxAllowedOligomer+1);
   sum=alloc_double(maxAllowedOligomer);
   cn=alloc_int(maxAllowedOligomer);

   for (i=0; i<maxAllowedOligomer; i++) { cn[i]=0; sum[i]=0; }
   maxOligomer=0;
   while (!feof(fp)) {
      if ((fgets(buffer,256,fp))>0)  {
         if (buffer[0]!='#') {
            len=strlen(buffer);
            buffer[len-1]='\0';
            tabFound=0;
            for (i=0; i<len; i++) { 
               if (buffer[i]=='\t') { tabFound=1; break; } 
            }
            if (tabFound) {
               tok=strtok(buffer,"\t");
               len2=strlen(tok);
               tok[len2]='\0';
               strcpy(s1,tok);

               if (len2>maxAllowedOligomer) continue;

               if (len2>maxOligomer) maxOligomer=len2;
               tok=strtok(0,"\t");
               freq=atof(tok);
               switch (len2) {
                  case 1:  { back->monomerFreq[cn[0]] =freq; strcpy(back->monomer[cn[0]],s1);  sum[0]+=freq; cn[0]++; break; }
                  case 2:  { back->dimerFreq[cn[1]]   =freq; strcpy(back->dimer[cn[1]],s1);    sum[1]+=freq; cn[1]++; break; }
                  case 3:  { back->trimerFreq[cn[2]]  =freq; strcpy(back->trimer[cn[2]],s1);   sum[2]+=freq; cn[2]++; break; }
                  case 4:  { back->tetramerFreq[cn[3]]=freq; strcpy(back->tetramer[cn[3]],s1); sum[3]+=freq; cn[3]++; break; }
                  case 5:  { back->pentamerFreq[cn[4]]=freq; strcpy(back->pentamer[cn[4]],s1); sum[4]+=freq; cn[4]++; break; }
                  case 6:  { back->hexamerFreq[cn[5]] =freq; strcpy(back->hexamer[cn[5]],s1);  sum[5]+=freq; cn[5]++; break; }
                  case 7:  { back->heptamerFreq[cn[6]]=freq; strcpy(back->heptamer[cn[6]],s1); sum[6]+=freq; cn[6]++; break; }
                  case 8:  { back->octamerFreq[cn[7]] =freq; strcpy(back->octamer[cn[7]],s1);  sum[7]+=freq; cn[7]++; break; }
                  case 9:  { back->nonamerFreq[cn[8]] =freq; strcpy(back->nonamer[cn[8]],s1);  sum[8]+=freq; cn[8]++; break; }
                  case 10: { back->decamerFreq[cn[9]] =freq; strcpy(back->decamer[cn[9]],s1);  sum[9]+=freq; cn[9]++; break; }
                  default: break; 
               } 
            }
            else {
               tok=strtok(buffer," ");
               len2=strlen(tok);
               if (len2>10) { printf("Error: up to 9th order is allowed!\n"); exit(0); }
               tok=strtok(0," ");
               freq=atof(tok);
               switch (len2) {
                  case 1:  { back->monomerFreq[cn[0]] =freq; sum[0]+=freq; cn[0]++; break; }
                  case 2:  { back->dimerFreq[cn[1]]   =freq; sum[1]+=freq; cn[1]++; break; }
                  case 3:  { back->trimerFreq[cn[2]]  =freq; sum[2]+=freq; cn[2]++; break; }
                  case 4:  { back->tetramerFreq[cn[3]]=freq; sum[3]+=freq; cn[3]++; break; }
                  case 5:  { back->pentamerFreq[cn[4]]=freq; sum[4]+=freq; cn[4]++; break; }
                  case 6:  { back->hexamerFreq[cn[5]] =freq; sum[5]+=freq; cn[5]++; break; }
                  case 7:  { back->heptamerFreq[cn[6]]=freq; sum[6]+=freq; cn[6]++; break; }
                  case 8:  { back->octamerFreq[cn[7]] =freq; sum[7]+=freq; cn[7]++; break; }
                  case 9:  { back->nonamerFreq[cn[8]] =freq; sum[8]+=freq; cn[8]++; break; }
                  case 10: { back->decamerFreq[cn[9]] =freq; sum[9]+=freq; cn[9]++; break; }
                  default: break; 
               } 
            } 
         } 
      }
   }
   fclose(fp);

   printf("maximal k-mer read: %d - maximal Markov order: %d\n\n",maxOligomer,maxOligomer-1);

   // check probability sum
   for (i=0; i<maxOligomer; i++) {
      if (fabs(sum[i]-1.0)>0.001) printf("sum of marginal %d: %8.6f\n",i+1,sum[i]); 
   }
   if (maxOligomer==0) { printf("Error: no frequencies in %s\n",fileName); exit(0); }

   order=0;
   for (ii=0; ii<maxOligomer; ii++) {
      transition_1st(back->dimerFreq,back->transition1);                   // compute transition from marginal
      order++;
      if (order==maxOligomer-1) break;
      transition_2nd(back->trimerFreq,back->transition2);
      order++;
      if (order==maxOligomer-1) break;
      transition_3rd(back->tetramerFreq,back->transition3);
      order++;
      if (order==maxOligomer-1) break;
      transition_4th(back->pentamerFreq,back->transition4);
      order++;
      if (order==maxOligomer-1) break;
      transition_5th(back->hexamerFreq,back->transition5);
      order++;
      if (order==maxOligomer-1) break;
      transition_6th(back->heptamerFreq,back->transition6);
      order++;
      if (order==maxOligomer-1) break;
      transition_7th(back->octamerFreq,back->transition7);
      order++;
      if (order==maxOligomer-1) break;
      transition_8th(back->nonamerFreq,back->transition8);
      order++;
      if (order==maxOligomer-1) break;
      transition_9th(back->decamerFreq,back->transition9);
      order++;
      if (order==maxOligomer-1) break;
   }

   order=0;
   for (ii=0; ii<maxOligomer; ii++) {
      for (i=0; i<4; i++)  back->monomerFreq[i]=log(back->monomerFreq[i]); // log marginal
      for (i=0; i<16; i++) back->transition1[i]=log(back->transition1[i]); // log transition
      order++;
      if (order==maxOligomer-1) break;

      for (i=0; i<16; i++) back->dimerFreq[i]  =log(back->dimerFreq[i]);
      for (i=0; i<64; i++) back->transition2[i]=log(back->transition2[i]);
      order++;
      if (order==maxOligomer-1) break;

      for (i=0; i<64; i++)  back->trimerFreq[i] =log(back->trimerFreq[i]);
      for (i=0; i<256; i++) back->transition3[i]=log(back->transition3[i]);
      order++;
      if (order==maxOligomer-1) break;

      for (i=0; i<256; i++)  back->tetramerFreq[i]=log(back->tetramerFreq[i]);
      for (i=0; i<1024; i++) back->transition4[i] =log(back->transition4[i]);
      order++;
      if (order==maxOligomer-1) break;

      for (i=0; i<1024; i++) back->pentamerFreq[i]=log(back->pentamerFreq[i]);
      for (i=0; i<4096; i++) back->transition5[i] =log(back->transition5[i]);
      order++;
      if (order==maxOligomer-1) break;

      for (i=0; i<4096; i++)  back->hexamerFreq[i]=log(back->hexamerFreq[i]);
      for (i=0; i<16384; i++) back->transition6[i]=log(back->transition6[i]);
      order++;
      if (order==maxOligomer-1) break;

      for (i=0; i<16384; i++) back->heptamerFreq[i]=log(back->heptamerFreq[i]);
      for (i=0; i<65536; i++) back->transition7[i] =log(back->transition7[i]);
      order++;
      if (order==maxOligomer-1) break;

      for (i=0; i<65536; i++)  back->octamerFreq[i]=log(back->octamerFreq[i]);
      for (i=0; i<262144; i++) back->transition8[i]=log(back->transition8[i]);
      order++;
      if (order==maxOligomer-1) break;

      for (i=0; i<262144; i++)  back->nonamerFreq[i]=log(back->nonamerFreq[i]);
      for (i=0; i<1048576; i++) back->transition9[i]=log(back->transition9[i]);
      order++;
      if (order==maxOligomer-1) break;
   }
   nonACGT(back);

   /*  -------------------------debugging...          ------------------------- 
   for (i=0; i<4; i++)      printf("%8.5f\n",back->monomerFreq[i]);  printf("\n");
   for (i=0; i<16; i++)     printf("%8.5f\n",back->dimerFreq[i]);    printf("\n");
   for (i=0; i<64; i++)     printf("%8.5f\n",back->trimerFreq[i]);   printf("\n");
   for (i=0; i<256; i++)    printf("%8.5f\n",back->tetramerFreq[i]); printf("\n");
   for (i=0; i<1024; i++)   printf("%8.5f\n",back->pentamerFreq[i]); printf("\n");
   for (i=0; i<4096; i++)   printf("%8.5f\n",back->hexamerFreq[i]);  printf("\n\n");
   for (i=0; i<16384; i++)  printf("%8.5f\n",back->heptamerFreq[i]); printf("\n\n");
   for (i=0; i<65536; i++)  printf("%8.5f\n",back->octamerFreq[i]);  printf("\n\n");
   for (i=0; i<262144; i++) printf("%8.5f\n",back->nonamerFreq[i]);  printf("\n\n");
  
   for (i=0; i<16; i++)     printf("%8.5f\n",back->transition1[i]);  printf("\n");
   for (i=0; i<64; i++)     printf("%8.5f\n",back->transition2[i]);  printf("\n");
   for (i=0; i<256; i++)    printf("%8.5f\n",back->transition3[i]);  printf("\n");
   for (i=0; i<1024; i++)   printf("%8.5f\n",back->transition4[i]);  printf("\n");
   for (i=0; i<4096; i++)   printf("%8.5f\n",back->transition5[i]);  printf("\n\n");
   for (i=0; i<16384; i++)  printf("%8.5f\n",back->transition6[i]);  printf("\n\n");
   for (i=0; i<65536; i++)  printf("%8.5f\n",back->transition7[i]);  printf("\n\n");
   for (i=0; i<262144; i++) printf("%8.5f\n",back->transition8[i]);  printf("\n\n");
   exit(0); 
   ---------------------------debugging...          -----------------------------*/

   if (buffer) { free(buffer); buffer=NULL; }
   if (sum)    { free(sum);    sum=NULL;    }
   if (cn)     { free(cn);     cn=NULL;     }
   if (s1)     { free(s1);     s1=NULL;     }

   return (maxOligomer-1);
}

BACKGROUND_Model *alloc_background(void) {
 
   BACKGROUND_Model *back;
   
   back=NULL;

   back=(BACKGROUND_Model *)calloc(1,sizeof(BACKGROUND_Model));

   back->monomerFreq=alloc_double(4+1);       // marginal
   back->dimerFreq=alloc_double(16+1);        // marginal
   back->trimerFreq=alloc_double(64+1);       // marginal
   back->tetramerFreq=alloc_double(256+1);    // marginal
   back->pentamerFreq=alloc_double(1024+1);   // marginal
   back->hexamerFreq=alloc_double(4096+1);    // marginal
   back->heptamerFreq=alloc_double(16384+1);  // marginal
   back->octamerFreq=alloc_double(65536+1);   // marginal
   back->nonamerFreq=alloc_double(262144+1);  // marginal
   back->decamerFreq=alloc_double(1048576);   // marginal

   back->transition1=alloc_double(16+1);      // 1st order
   back->transition2=alloc_double(64+1);      // 2nd order
   back->transition3=alloc_double(256+1);     // 3rd order
   back->transition4=alloc_double(1024+1);    // 4th order
   back->transition5=alloc_double(4096+1);    // 5th order
   back->transition6=alloc_double(16384+1);   // 6th order
   back->transition7=alloc_double(65536+1);   // 7th order
   back->transition8=alloc_double(262144+1);  // 8th order
   back->transition9=alloc_double(1048576+1); // 9th order

   back->monomer=alloc_char_char(4,2);        // k-mer
   back->dimer=alloc_char_char(16,3);         // k-mer
   back->trimer=alloc_char_char(64,4);        // k-mer
   back->tetramer=alloc_char_char(256,5);     // k-mer
   back->pentamer=alloc_char_char(1024,6);    // k-mer
   back->hexamer=alloc_char_char(4096,7);     // k-mer
   back->heptamer=alloc_char_char(16384,8);   // k-mer
   back->octamer=alloc_char_char(65536,9);    // k-mer
   back->nonamer=alloc_char_char(262144,10);  // k-mer
   back->decamer=alloc_char_char(1048576,11); // k-mer

   return (back);
}

void destroy_background(BACKGROUND_Model *back) {

   if (back->monomerFreq)  { free(back->monomerFreq); back->monomerFreq=NULL;  }
   if (back->dimerFreq)    { free(back->dimerFreq);   back->dimerFreq=NULL;    }
   if (back->trimerFreq)   { free(back->trimerFreq);  back->trimerFreq=NULL;   }
   if (back->tetramerFreq) { free(back->tetramerFreq);back->tetramerFreq=NULL; }
   if (back->pentamerFreq) { free(back->pentamerFreq);back->pentamerFreq=NULL; }
   if (back->hexamerFreq)  { free(back->hexamerFreq); back->hexamerFreq=NULL;  }
   if (back->heptamerFreq) { free(back->heptamerFreq);back->heptamerFreq=NULL; }
   if (back->octamerFreq)  { free(back->octamerFreq); back->octamerFreq=NULL;  }
   if (back->decamerFreq)  { free(back->decamerFreq); back->decamerFreq=NULL;  }
   if (back->transition1)  { free(back->transition1); back->transition1=NULL;  }
   if (back->transition2)  { free(back->transition2); back->transition2=NULL;  }
   if (back->transition3)  { free(back->transition3); back->transition3=NULL;  }
   if (back->transition4)  { free(back->transition4); back->transition4=NULL;  }
   if (back->transition5)  { free(back->transition5); back->transition5=NULL;  }
   if (back->transition6)  { free(back->transition6); back->transition6=NULL;  }
   if (back->transition7)  { free(back->transition7); back->transition7=NULL;  }
   if (back->transition8)  { free(back->transition8); back->transition8=NULL;  }
   if (back->transition9)  { free(back->transition9); back->transition9=NULL;  }
   if (back->monomer[0])   { free(back->monomer[0]);  back->monomer[0]=NULL;   }
   if (back->dimer[0])     { free(back->dimer[0]);    back->dimer[0]=NULL;     }
   if (back->trimer[0])    { free(back->trimer[0]);   back->trimer[0]=NULL;    }
   if (back->tetramer[0])  { free(back->tetramer[0]); back->tetramer[0]=NULL;  }
   if (back->pentamer[0])  { free(back->pentamer[0]); back->pentamer[0]=NULL;  }
   if (back->hexamer[0])   { free(back->hexamer[0]);  back->hexamer[0]=NULL;   }
   if (back->heptamer[0])  { free(back->heptamer[0]); back->heptamer[0]=NULL;  }
   if (back->octamer[0])   { free(back->octamer[0]);  back->octamer[0]=NULL;   }
   if (back->nonamer[0])   { free(back->nonamer[0]);  back->nonamer[0]=NULL;   }
   if (back->decamer[0])   { free(back->decamer[0]);  back->decamer[0]=NULL;   }
   if (back->monomer)      { free(back->monomer);     back->monomer=NULL;      }
   if (back->dimer)        { free(back->dimer);       back->dimer=NULL;        }
   if (back->trimer)       { free(back->trimer);      back->trimer=NULL;       }
   if (back->tetramer)     { free(back->tetramer);    back->tetramer=NULL;     }
   if (back->pentamer)     { free(back->pentamer);    back->pentamer=NULL;     }
   if (back->hexamer)      { free(back->hexamer);     back->hexamer=NULL;      }
   if (back->heptamer)     { free(back->heptamer);    back->heptamer=NULL;     }
   if (back->octamer)      { free(back->octamer);     back->octamer=NULL;      }
   if (back->nonamer)      { free(back->nonamer);     back->nonamer=NULL;      }
   if (back->decamer)      { free(back->decamer);     back->decamer=NULL;      }
   if (back) { free(back); back=NULL;                                    }
}

void transition_1st(double *dimerFreq,double *transition1) {

   register int i,j;
   int cn1,cn2;
   double sum;

   /* dimer */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      sum=0; for (j=0; j<4; j++) { sum +=dimerFreq[cn1]; cn1++; }

      if (sum<=PSEUDO_FREQ) {
         for (j=0; j<4; j++) { transition1[cn2]=0.25; cn2++; }
      }
      else {
         for (j=0; j<4; j++)  { 
            transition1[cn2]=(PSEUDO_FREQ+dimerFreq[cn2])/(4*PSEUDO_FREQ+sum);
            cn2++; 
         }
      }
   }
}

void transition_2nd(double *trimerFreq,double *transition2) {

   register int i,k,j;
   int cn1,cn2;
   double sum;

   /* trimer */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      for (k=0; k<4; k++) {
         sum=0; for (j=0; j<4; j++) { sum +=trimerFreq[cn1]; cn1++; }

         if (sum<=PSEUDO_FREQ) {
            for (j=0; j<4; j++) { transition2[cn2]=0.25; cn2++; }
         }
         else {
            for (j=0; j<4; j++)  { 
               transition2[cn2]=(PSEUDO_FREQ+trimerFreq[cn2])/(4*PSEUDO_FREQ+sum);
               cn2++; 
            }
         }
      }
   }
}


void transition_3rd(double *tetramerFreq,double *transition3) {

   register int i,k,l,j;
   int cn1,cn2;
   double sum;

   /* tetramer */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      for (k=0; k<4; k++) {
         for (l=0; l<4; l++) {
            sum=0; for (j=0; j<4; j++) { sum +=tetramerFreq[cn1]; cn1++; }

            if (sum<=PSEUDO_FREQ) {
               for (j=0; j<4; j++) { transition3[cn2]=0.25; cn2++; }
            }
            else {
               for (j=0; j<4; j++)  { 
                  transition3[cn2]=(PSEUDO_FREQ+tetramerFreq[cn2])/(4*PSEUDO_FREQ+sum);
                  cn2++; 
               }
            }
         }
      }
   }
}

void transition_4th(double *pentamerFreq,double *transition4) {

   register int i,k,l,m,j;
   int cn1,cn2;
   double sum;

   /* pentamer */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      for (k=0; k<4; k++) {
         for (l=0; l<4; l++) {
            for (m=0; m<4; m++) {
               sum=0; for (j=0; j<4; j++) { sum +=pentamerFreq[cn1]; cn1++; }

               if (sum<=PSEUDO_FREQ) {
                  for (j=0; j<4; j++) { transition4[cn2]=0.25; cn2++; }
               }
               else {
                  for (j=0; j<4; j++)  { 
                     transition4[cn2]=(PSEUDO_FREQ+pentamerFreq[cn2])/(4*PSEUDO_FREQ+sum);
                     cn2++; 
                  }
               }
            }
         }
      }
   }
}

void transition_5th(double *hexamerFreq,double *transition5) {

   register int i,k,l,m,n,j;
   int cn1,cn2;
   double sum;

   /* hexamer */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      for (k=0; k<4; k++) {
         for (l=0; l<4; l++) {
            for (m=0; m<4; m++) {
               for (n=0; n<4; n++) {
                  sum=0; for (j=0; j<4; j++) { sum +=hexamerFreq[cn1]; cn1++; }
   
                  if (sum<=PSEUDO_FREQ) {
                     for (j=0; j<4; j++) { transition5[cn2]=0.25; cn2++; }
                  }
                  else {
                     for (j=0; j<4; j++)  { 
                        transition5[cn2]=(PSEUDO_FREQ+hexamerFreq[cn2])/(4*PSEUDO_FREQ+sum);
                        cn2++; 
                     }
                  }
               }
            }
         }
      }
   }
}

void transition_6th(double *heptamerFreq,double *transition6) {

   register int i,k,l,m,n,o,j;
   int cn1,cn2;
   double sum;

   /* heptamer */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      for (k=0; k<4; k++) {
         for (l=0; l<4; l++) {
            for (m=0; m<4; m++) {
               for (n=0; n<4; n++) {
                  for (o=0; o<4; o++) {
                     sum=0; for (j=0; j<4; j++) { sum +=heptamerFreq[cn1]; cn1++; }
   
                     if (sum<=PSEUDO_FREQ) {
                        for (j=0; j<4; j++) { transition6[cn2]=0.25; cn2++; }
                     }
                     else {
                        for (j=0; j<4; j++)  { 
                           transition6[cn2]=(PSEUDO_FREQ+heptamerFreq[cn2])/(4*PSEUDO_FREQ+sum);
                           cn2++; 
                        }
                     }
                  }
               }
            }
         }
      }
   }
}

void transition_7th(double *octamerFreq,double *transition7) {

   register int i,k,l,m,n,o,p,j;
   int cn1,cn2;
   double sum;

   /* octamerFreq */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      for (k=0; k<4; k++) {
         for (l=0; l<4; l++) {
            for (m=0; m<4; m++) {
               for (n=0; n<4; n++) {
                  for (o=0; o<4; o++) {
                     for (p=0; p<4; p++) {
                        sum=0; for (j=0; j<4; j++) { sum +=octamerFreq[cn1]; cn1++; }
   
                        if (sum<=PSEUDO_FREQ) {
                           for (j=0; j<4; j++) { transition7[cn2]=0.25; cn2++; }
                        }
                        else {
                           for (j=0; j<4; j++)  { 
                              transition7[cn2]=(PSEUDO_FREQ+octamerFreq[cn2])/(4*PSEUDO_FREQ+sum);
                              cn2++; 
                           }
                        }
                     }
                  }
               }
            }
         }
      }
   }
}

void transition_8th(double *nonamerFreq,double *transition8) {

   register int i,k,l,m,n,o,p,q,j;
   int cn1,cn2;
   double sum;

   /* nonamerFreq */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      for (k=0; k<4; k++) {
         for (l=0; l<4; l++) {
            for (m=0; m<4; m++) {
               for (n=0; n<4; n++) {
                  for (o=0; o<4; o++) {
                     for (p=0; p<4; p++) {
                        for (q=0; q<4; q++) {
                           sum=0; for (j=0; j<4; j++) { sum +=nonamerFreq[cn1]; cn1++; }
   
                           if (sum<=PSEUDO_FREQ) {
                              for (j=0; j<4; j++) { transition8[cn2]=0.25; cn2++; }
                           }
                           else {
                              for (j=0; j<4; j++)  { 
                                 transition8[cn2]=(PSEUDO_FREQ+nonamerFreq[cn2])/(4*PSEUDO_FREQ+sum);
                                 cn2++; 
                              }
                           }
                        }
                     }
                  }
               }
            }
         }
      }
   }
}

void transition_9th(double *decamerFreq,double *transition9) {

   register int i,k,l,m,n,o,p,q,r,j;
   int cn1,cn2;
   double sum;

   /* decamerFreq */
   cn1=0; cn2=0;
   for (i=0; i<4; i++) {
      for (k=0; k<4; k++) {
         for (l=0; l<4; l++) {
            for (m=0; m<4; m++) {
               for (n=0; n<4; n++) {
                  for (o=0; o<4; o++) {
                     for (p=0; p<4; p++) {
                        for (q=0; q<4; q++) {
                           for (r=0; r<4; r++) {
                              sum=0; for (j=0; j<4; j++) { sum +=decamerFreq[cn1]; cn1++; }

                              if (sum<=PSEUDO_FREQ) {
                                 for (j=0; j<4; j++) { transition9[cn2]=0.25; cn2++; }
                              }
                              else {
                                 for (j=0; j<4; j++)  {
                                    transition9[cn2]=(PSEUDO_FREQ+decamerFreq[cn2])/(4*PSEUDO_FREQ+sum);
                                    cn2++;
                                 }
                              }
                           }
                        }
                     }
                  }
               }
            }
         }
      }
   }
}

void marginal_prob(int *count,int numKmer,double *freq) {

   register int i;
   double sum;

   //printf("number of kmer=%d\n",numKmer);
   //for (i=0; i<numKmer; i++) printf("%d\n",count[i]);

   sum=0; for (i=0; i<numKmer; i++) sum +=(double)count[i];

   if (sum<=PSEUDO_FREQ) {
      printf("Error: data contains no [a,c,g,t].\n");  exit(0);
   }
   else {
      for (i=0; i<numKmer; i++) {
         freq[i]=(PSEUDO_FREQ+(double)count[i])/(sum+numKmer*PSEUDO_FREQ);
      }
   }
}

void weightscore_backg_markov(int numSeq,SAMPLE *data,MODEL *model,B_LIKELIHOOD *ll,
   BACKGROUND_Model *back,int MarkovOrder,int two_motif_model) {

   register int i,j,k,m;
   int numPWM;
   int *base;
   double tmpFactor;

   if (two_motif_model) {
      base=alloc_int(max(model->pwmLen[0],model->pwmLen[1]));
      numPWM=2; 
   }
   else {
      base=alloc_int(model->pwmLen[0]);
      numPWM=1; 
   }

   for (i=0; i<numSeq; i++) {
      ll[i].seq_b=0; 

      base=alloc_int(data[i].length);
      for (j=0; j<data[i].length; j++) {
         switch(data[i].seq[j]) {
            case 'a': base[j]=0;  break; 
            case 'c': base[j]=1;  break; 
            case 'g': base[j]=2;  break; 
            case 't': base[j]=3;  break; 
            default:  base[j]=-1; break; 
         } 
      }
  
      if (MarkovOrder==1) {
         if (base[0]==-1) ll[i].seq_b +=back->monomerFreq[4];        // all marginal and transitional log-ed
         else             ll[i].seq_b +=back->monomerFreq[base[0]];  // all marginal and transitional log-ed
      
         for (k=0; k<data[i].length-1; k++) {
            if (base[k]==-1 || base[k+1]==-1) ll[i].seq_b +=back->transition1[16]; // transitional prob. all to 0.25
            else ll[i].seq_b +=back->transition1[base[k]*4+base[k+1]];
         }
      }
      else if (MarkovOrder==2) {
         if (base[0]==-1 || base[1]==-1) ll[i].seq_b +=back->dimerFreq[16]; // marginal
         else ll[i].seq_b +=back->dimerFreq[base[0]*4+base[1]];
   
         for (k=0; k<data[i].length-2; k++) {
            if (base[k]==-1||base[k+1]==-1||base[k+2]==-1) ll[i].seq_b +=back->transition2[64]; //transitional
            else ll[i].seq_b +=back->transition2[base[k]*16+base[k+1]*4+base[k+2]];
         }
      }
      else if (MarkovOrder==3) {
         if (base[0]==-1||base[1]==-1||base[2]==-1) 
            ll[i].seq_b +=back->trimerFreq[64]; //marginal
         else 
            ll[i].seq_b +=back->trimerFreq[base[0]*16+base[1]*4+base[2]];
   
         for (k=0; k<data[i].length-3; k++) {
            if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1) 
               ll[i].seq_b +=back->transition3[256]; //transitional 
            else 
               ll[i].seq_b +=back->transition3[base[k]*64+base[k+1]*16+base[k+2]*4+base[k+3]];
         }
      } 
      else if (MarkovOrder==4) {
         if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1) 
            ll[i].seq_b +=back->tetramerFreq[256]; //marginal 
         else 
            ll[i].seq_b +=back->tetramerFreq[base[0]*64+base[1]*16+base[2]*4+base[3]];
   
         for (k=0; k<data[i].length-4; k++) {
            if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1) 
               ll[i].seq_b +=back->transition4[1024]; // transitional
            else 
               ll[i].seq_b +=back->transition4[base[k]*256+base[k+1]*64+base[k+2]*16+base[k+3]*4+base[k+4]];
         }
      } 
      else if (MarkovOrder==5) {
         if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1) 
            ll[i].seq_b +=back->pentamerFreq[1024]; //marginal 
         else 
            ll[i].seq_b +=back->pentamerFreq[base[0]*256+base[1]*64+base[2]*16+base[3]*4+base[4]];
   
         for (k=0; k<data[i].length-5; k++) {
            if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1) 
               ll[i].seq_b +=back->transition5[4096]; 
            else 
               ll[i].seq_b +=back->transition5[base[k]*1024+base[k+1]*256+base[k+2]*64+base[k+3]*16+base[k+4]*4+base[k+5]];
         }
      }
      else if (MarkovOrder==6) {
         if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1||base[5]==-1) 
            ll[i].seq_b +=back->hexamerFreq[4096]; 
         else 
            ll[i].seq_b +=back->hexamerFreq[base[0]*1024+base[1]*256+base[2]*64+base[3]*16+base[4]*4+base[5]];
   
         for (k=0; k<data[i].length-6; k++) {
            if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1||base[k+6]==-1) 
               ll[i].seq_b +=back->transition6[16384]; 
            else 
               ll[i].seq_b +=back->transition6[base[k]*4096+base[k+1]*1024+base[k+2]*256+base[k+3]*64+base[k+4]*16+base[k+5]*4+base[k+6]];
         }
      }
      else if (MarkovOrder==7) {
         if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1||base[5]==-1||base[6]==-1) 
            ll[i].seq_b +=back->heptamerFreq[16384]; 
         else 
            ll[i].seq_b +=back->heptamerFreq[base[0]*4096+base[1]*1024+base[2]*256+base[3]*64+base[4]*16+base[5]*4+base[6]];
   
         for (k=0; k<data[i].length-7; k++) {
            if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1||base[k+6]==-1||base[k+7]==-1) 
               ll[i].seq_b +=back->transition7[65536]; 
            else 
               ll[i].seq_b +=back->transition7[base[k]*16384+base[k+1]*4096+base[k+2]*1024+base[k+3]*256+base[k+4]*64+base[k+5]*16+base[k+6]*4+base[k+7]];
         }
      }
      else if (MarkovOrder==8) {
         if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1||base[5]==-1||base[6]==-1||base[7]==-1) 
            ll[i].seq_b +=back->octamerFreq[65536]; 
         else 
            ll[i].seq_b+=back->octamerFreq[base[0]*16384+base[1]*4096+base[2]*1024+base[3]*256+base[4]*64+base[5]*16+base[6]*4+base[7]];
   
         for (k=0; k<data[i].length-8; k++) {
            if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1||base[k+6]==-1||base[k+7]==-1||base[k+8]==-1) 
               ll[i].seq_b +=back->transition8[262144]; 
            else 
               ll[i].seq_b +=back->transition8[base[k]*65536+base[k+1]*16384+base[k+2]*4096+base[k+3]*1024+base[k+4]*256+base[k+5]*64+base[k+6]*16+base[k+7]*4+base[k+8]];
         }
      }
      else if (MarkovOrder==9) {
         if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1||base[5]==-1||base[6]==-1||base[7]==-1||base[8]==-1) 
            ll[i].seq_b +=back->nonamerFreq[262144]; 
         else 
            ll[i].seq_b+=back->nonamerFreq[base[0]*65536+base[1]*16384+base[2]*4096+base[3]*1024+base[4]*256+base[5]*64+base[6]*16+base[7]*4+base[8]];
   
         for (k=0; k<data[i].length-9; k++) {
            if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1||base[k+6]==-1||base[k+7]==-1||base[k+8]==-1||base[k+9]==-1) 
               ll[i].seq_b +=back->transition9[1048576]; 
            else 
               ll[i].seq_b +=back->transition9[base[k]*262144+base[k+1]*65536+base[k+2]*16384+base[k+3]*4096+base[k+4]*1024+base[k+5]*256+base[k+6]*64+base[k+7]*16+base[k+8]*4+base[k+9]];
         }
      }
      else {
         for (k=0; k<data[i].length; k++) {
            switch (data[i].seq[k]) {
               case 'a': ll[i].seq_b +=back->monomerFreq[0]; break; 
               case 'c': ll[i].seq_b +=back->monomerFreq[1]; break; 
               case 'g': ll[i].seq_b +=back->monomerFreq[2]; break; 
               case 't': ll[i].seq_b +=back->monomerFreq[3]; break; 
               default:  ll[i].seq_b +=back->monomerFreq[4]; break;
            } 
         } 
      }
      // printf("seq prob: %10.8f\n",ll[i].seq_b);

      /*-----------------------------------------------*/
      tmpFactor=ll[i].seq_b;
      for (j=0; j<10000; j++) {
         tmpFactor=ll[i].seq_b+j*10;
         if (tmpFactor>1e-10) {
            ll[i].seq_b +=j*10;
            break; 
         }
      }
      /*------------------------------------------------*/
 
      // printf("seq prob: %10.8f\n",ll[i].seq_b);
      if (base) { free(base); base=NULL; }
   }

   if (two_motif_model) base=alloc_int(max(model->pwmLen[0],model->pwmLen[1]));
   else base=alloc_int(model->pwmLen[0]);

   for (i=0; i<numSeq; i++) {
      for (m=0; m<numPWM; m++) {
         for (j=0; j<data[i].length-model->pwmLen[m]+1; j++) {
   
            ll[i].site_b[m][j]=0;

            for (k=0; k<model->pwmLen[m]; k++) {
               switch(data[i].seq[j+k]) {
                  case 'a': base[k]=0;  break; 
                  case 'c': base[k]=1;  break; 
                  case 'g': base[k]=2;  break; 
                  case 't': base[k]=3;  break; 
                  default:  base[k]=-1; break; 
               } 
            }
   
            if (MarkovOrder==1) {
               if (base[0]==-1) ll[i].site_b[m][j] +=back->monomerFreq[4];        // all marginal and transitional log-ed
               else             ll[i].site_b[m][j] +=back->monomerFreq[base[0]];  // all marginal and transitional log-ed
         
               for (k=0; k<model->pwmLen[m]-1; k++) {
                  if (base[k]==-1 || base[k+1]==-1) ll[i].site_b[m][j] +=back->transition1[16]; // transitional prob. all to 0.25
                  else ll[i].site_b[m][j] +=back->transition1[base[k]*4+base[k+1]];
               }
            }
            else if (MarkovOrder==2) {
               if (base[0]==-1 || base[1]==-1) ll[i].site_b[m][j] +=back->dimerFreq[16]; // marginal
               else ll[i].site_b[m][j] +=back->dimerFreq[base[0]*4+base[1]];
   
               for (k=0; k<model->pwmLen[m]-2; k++) {
                  if (base[k]==-1||base[k+1]==-1||base[k+2]==-1) ll[i].site_b[m][j] +=back->transition2[64]; //transitional
                  else ll[i].site_b[m][j] +=back->transition2[base[k]*16+base[k+1]*4+base[k+2]];
               }
            }
            else if (MarkovOrder==3) {
               if (base[0]==-1||base[1]==-1||base[2]==-1) 
                  ll[i].site_b[m][j] +=back->trimerFreq[64]; //marginal
               else 
                  ll[i].site_b[m][j] +=back->trimerFreq[base[0]*16+base[1]*4+base[2]];
   
               for (k=0; k<model->pwmLen[m]-3; k++) {
                  if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1) 
                     ll[i].site_b[m][j] +=back->transition3[256]; //transitional 
                  else 
                     ll[i].site_b[m][j] +=back->transition3[base[k]*64+base[k+1]*16+base[k+2]*4+base[k+3]];
               }
            } 
            else if (MarkovOrder==4) {
               if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1) 
                  ll[i].site_b[m][j] +=back->tetramerFreq[256]; //marginal 
               else 
                  ll[i].site_b[m][j] +=back->tetramerFreq[base[0]*64+base[1]*16+base[2]*4+base[3]];
   
               for (k=0; k<model->pwmLen[m]-4; k++) {
                  if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1) 
                     ll[i].site_b[m][j] +=back->transition4[1024]; // transitional
                  else 
                     ll[i].site_b[m][j] +=back->transition4[base[k]*256+base[k+1]*64+base[k+2]*16+base[k+3]*4+base[k+4]];
               }
            } 
            else if (MarkovOrder==5) {
               if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1) 
                  ll[i].site_b[m][j] +=back->pentamerFreq[1024]; //marginal 
               else 
                  ll[i].site_b[m][j] +=back->pentamerFreq[base[0]*256+base[1]*64+base[2]*16+base[3]*4+base[4]];
   
               for (k=0; k<model->pwmLen[m]-5; k++) {
                  if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1) 
                     ll[i].site_b[m][j] +=back->transition5[4096]; 
                  else 
                     ll[i].site_b[m][j] +=back->transition5[base[k]*1024+base[k+1]*256+base[k+2]*64+base[k+3]*16+base[k+4]*4+base[k+5]];
               }
            }
            else if (MarkovOrder==6) {
               if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1||base[5]==-1) 
                  ll[i].site_b[m][j] +=back->hexamerFreq[4096]; 
               else 
                  ll[i].site_b[m][j] +=back->hexamerFreq[base[0]*1024+base[1]*256+base[2]*64+base[3]*16+base[4]*4+base[5]];
   
               for (k=0; k<model->pwmLen[m]-6; k++) {
                  if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1||base[k+6]==-1) 
                     ll[i].site_b[m][j] +=back->transition6[16384]; 
                  else 
                     ll[i].site_b[m][j] +=back->transition6[base[k]*4096+base[k+1]*1024+base[k+2]*256+base[k+3]*64+base[k+4]*16+base[k+5]*4+base[k+6]];
               }
            }
            else if (MarkovOrder==7) {
               if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1||base[5]==-1||base[6]==-1) 
                  ll[i].site_b[m][j] +=back->heptamerFreq[16384]; 
               else 
                  ll[i].site_b[m][j] +=back->heptamerFreq[base[0]*4096+base[1]*1024+base[2]*256+base[3]*64+base[4]*16+base[5]*4+base[6]];
   
               for (k=0; k<model->pwmLen[m]-7; k++) {
                  if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1||base[k+6]==-1||base[k+7]==-1) 
                     ll[i].site_b[m][j] +=back->transition7[65536]; 
                  else 
                     ll[i].site_b[m][j] +=back->transition7[base[k]*16384+base[k+1]*4096+base[k+2]*1024+base[k+3]*256+base[k+4]*64+base[k+5]*16+base[k+6]*4+base[k+7]];
               }
            }
            else if (MarkovOrder==8) {
               if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1||base[5]==-1||base[6]==-1||base[7]==-1) 
                  ll[i].site_b[m][j] +=back->octamerFreq[65536]; 
               else 
                  ll[i].site_b[m][j]+=back->octamerFreq[base[0]*16384+base[1]*4096+base[2]*1024+base[3]*256+base[4]*64+base[5]*16+base[6]*4+base[7]];
   
               for (k=0; k<model->pwmLen[m]-8; k++) {
                  if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1||base[k+6]==-1||base[k+7]==-1||base[k+8]==-1) 
                     ll[i].site_b[m][j] +=back->transition8[262144]; 
                  else 
                     ll[i].site_b[m][j] +=back->transition8[base[k]*65536+base[k+1]*16384+base[k+2]*4096+base[k+3]*1024+base[k+4]*256+base[k+5]*64+base[k+6]*16+base[k+7]*4+base[k+8]];
               }
            }
            else if (MarkovOrder==9) {
               if (base[0]==-1||base[1]==-1||base[2]==-1||base[3]==-1||base[4]==-1||base[5]==-1||base[6]==-1||base[7]==-1||base[8]==-1)
                  ll[i].site_b[m][j] +=back->nonamerFreq[262144];
               else
                  ll[i].site_b[m][j]+=back->nonamerFreq[base[0]*65536+base[1]*16384+base[2]*4096+base[3]*1024+base[4]*256+base[5]*64+base[6]*16+base[7]*4+base[8]];

               for (k=0; k<model->pwmLen[m]-9; k++) {
                  if (base[k]==-1||base[k+1]==-1||base[k+2]==-1||base[k+3]==-1||base[k+4]==-1||base[k+5]==-1||base[k+6]==-1||base[k+7]==-1||base[k+8]==-1||base[k+9]==-1)
                     ll[i].site_b[m][j] +=back->transition9[1048576];
                  else
                     ll[i].site_b[m][j] +=back->transition9[base[k]*262144+base[k+1]*65536+base[k+2]*16384+base[k+3]*4096+base[k+4]*1024+base[k+5]*256+base[k+6]*64+base[k+7]*16+base[k+8]*4+base[k+9]];
               }
            }

            else {
               // default: 0th order
               for (k=0; k<model->pwmLen[m]; k++) {
                  switch (data[i].seq[j+k]) {
                     case 'a': ll[i].site_b[m][j] +=back->monomerFreq[0]; break; 
                     case 'c': ll[i].site_b[m][j] +=back->monomerFreq[1]; break; 
                     case 'g': ll[i].site_b[m][j] +=back->monomerFreq[2]; break; 
                     case 't': ll[i].site_b[m][j] +=back->monomerFreq[3]; break; 
                     default:  ll[i].site_b[m][j] +=back->monomerFreq[4]; break;
                  } 
               } 
            } 
         } 
      }
   }
   if (base) { free(base); base=NULL; }
}

void nonACGT(BACKGROUND_Model *back) {

   back->monomerFreq[4]     =-log(4.0);
   back->dimerFreq[16]      =-log(16.0);
   back->trimerFreq[64]     =-log(64.0);
   back->tetramerFreq[256]  =-log(256.0);
   back->pentamerFreq[1024] =-log(1024.0);
   back->hexamerFreq[4096]  =-log(4096.0);
   back->heptamerFreq[16384]=-log(16384.0);
   back->octamerFreq[65536] =-log(65536.0);
   back->octamerFreq[262144]=-log(262144.0);
   back->transition1[16]    = log(0.25);
   back->transition2[64]    = log(0.25);
   back->transition3[256]   = log(0.25);
   back->transition4[1024]  = log(0.25);
   back->transition5[4096]  = log(0.25);
   back->transition6[16384] = log(0.25);
   back->transition7[65536] = log(0.25);
   back->transition8[262144]= log(0.25);
   back->transition9[1048576]=log(0.25);
}

