/*
//>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
//ART_454 -- Artificial Read Transcriber 
//Copyright(c) 2008-2011 Weichun Huang, All Rights Reserved.
//<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
*/
#include <iostream>
#include <sstream>
#include <string>
#include <time.h>
#include <algorithm>
#include <iomanip>
#include <ctime>
#include "art.h"
#include "readSeqFile.h"

using namespace std;

#define PRGNAME "art_454"
#define MINIMUM_FLAGMENT_SIZE 100

int main(int argc, char* argv[]){
    cout <<"==================================================================="<<endl;
    cout <<"                      ART_454 (Version 1.2.5)                      "<<endl; 
    cout <<"                   Simulation of 454 Pyrosequencing                "<<endl; 
    cout <<"     Copyright (c) 2008-2011, Weichun Huang. All Rights Reserved.  "<<endl; 
    cout <<"==================================================================="<<endl<<endl;

    string profile_name = "";
    int i=1;
    for(;i<argc;++i){
	    char* pch = argv[i];
	    if( *pch != '-' || *(pch+1) == '\0') break;
	    while(*++pch){
		    switch(*pch){
			    case 'p':
			    case 'P':
				    if(i<argc) profile_name=argv[++i]; 
				    break;
			    default:
				   cerr<<"Error: unreconized option \""<<*pch<< "\""<<endl;
				   break;
		    }
	    }
    }

    int k=argc-i+1;
    if(k !=4 &&  k !=6){
        cout<<"USAGE FOR SINGLE-END READ SIMULATION"<<endl<<endl;
        cout<<"     "<< PRGNAME <<" [ -p read_profile ] <INPUT_SEQ_FILE> <OUTPUT_FILE_PREFIX> <FOLD_COVERAGE>"<<endl<<endl;
       	cout<<"     Example:"<<endl;
       	cout<<"             "<<PRGNAME <<" seq_reference.fa ./outdir/dat_single_end 20"<<endl<<endl;
        cout<<"USAGE FOR PAIRED-END READ SIMULATION"<<endl<<endl;
        cout<<"     "<< PRGNAME <<" [ -p read_profile ] <INPUT_SEQ_FILE> <OUTPUT_FILE_PREFIX> <FOLD_COVERAGE> <MEAN_FRAG_LEN> <STD_DEV>"<<endl<<endl;
        cout<<"     Example:"<<endl;
       	cout<<"             "<<PRGNAME <<" seq_reference.fa ./outdir/dat_paired_end 20 500 20"<<endl<<endl;
        cout<<"------------------------Optional Parameters------------------------"<<endl<<endl;
        cout<<"-p specify user's own read profile for simulation"<<endl<<endl;
       	cout<<"     Note: the name of a read profile is the directory containing read profile data files."<<endl;
        cout<<"           please read the REAME file about the format of 454 read profile data files and."<<endl;
        cout<<"           and the default filenames of these data files." <<endl<<endl; 

        exit(0);
    }

    bool is_pairend_read=false;
    char* seq_file= argv[i]; //"w:/vmLinux/vsProjects/art/art/release/test.fa";
    string out_file_prefix=argv[i+1];//"w:/vmLinux/vsProjects/art/art/release/test454";
    double x_fold  = atof(argv[i+2]);
    string num="";
    if (k==6){
        num="1";
        is_pairend_read=true;
        art::ini_read_pair_rand(abs(atoi(argv[i+3])),fabs(atof(argv[i+4])));
        if(art::gaussain_mean <= MINIMUM_FLAGMENT_SIZE){
            cerr<<"Error: the mean fragment length is shorter than the minimum fragment size defined ("<< MINIMUM_FLAGMENT_SIZE <<")"<<endl;
            exit(1);
        }
    }

    string alnfasta=out_file_prefix+num+".aln";
    string fqfile=out_file_prefix+num+".fq";
    string statfile=out_file_prefix+".stat";

    ofstream FQFILE(fqfile.c_str(),ios::binary);
    if(!FQFILE.is_open()) { cout<<"can not open output file: "<<fqfile<<endl; exit(0); }

    ofstream ALNFILE(alnfasta.c_str(),ios::binary);
    if(!ALNFILE.is_open()) { cout<<"can not open output file: "<<alnfasta<<endl; exit(0); }

    ofstream STATFILE(statfile.c_str(),ios::binary);
    if(!STATFILE.is_open()) { cout<<"can not open output file: "<<statfile<<endl; exit(0); }

    read_profile::set_err_prob();
    read_profile qdist; 
    if(profile_name.empty()){
	    qdist.default_profile(); 
    }
    else{
	    qdist.user_profile(profile_name+"/qual_1st_profile", profile_name+"/qual_mc_profile", profile_name+"/indel_error_profile", profile_name+"/length_dist");
    }

    //caluate CPUT time
    clock_t start, end;
    double cpu_time_used;
    start = clock();

    int read_len  = 0;
    vector<short> qual;
    readSeqFile seq_reader(seq_file);
    string id;
    art a_art; 
    seqRead a_read;
    string aln_read,aln_ref;
    ostringstream osID;
    int num_seq=0;
    string read_id;

    unsigned long total_read_count=0;

    //paired-end reads simulaiton
    if(is_pairend_read){
        string alnfasta2=out_file_prefix+"2.aln";
        string fqfile2=out_file_prefix+"2.fq";

        ofstream FQFILE2(fqfile2.c_str(),ios::binary);
        if(!FQFILE2.is_open()) { cout<<"can not open output file: "<<fqfile2<<endl; exit(0); }

        ofstream ALNFILE2(alnfasta2.c_str(),ios::binary);
        if(!ALNFILE2.is_open()) { cout<<"can not open output file: "<<alnfasta2<<endl; exit(0); }
        seqRead a_read_2;
        vector<short> qual_2;
        string read_id_2;
        string aln_read_2,aln_ref_2;
       
        while(seq_reader.next_seq(id,a_art.ref_seq)){
            num_seq++;
            long t_num_base=(long) a_art.ref_seq.size();
            vector<int> depth_coverage(t_num_base,0);
            vector<int> start_coverage(t_num_base,0); //#read starting on the same position 

            long base_count=t_num_base; 
            int the_fold=x_fold;
            a_art.init_fast();
            while(the_fold>0){
                a_read.clear();

	    	int cc_try=0; //debug info
		do {
		       	read_len=qdist.get_ran_read_len();
		       	a_art.ini_set(read_len);
		 	//debug info
			if(cc_try>=100){
			       	cerr<<" was failed"<<endl;
		       	}
		       	cc_try++;
		       	if(cc_try>=100){ cerr<<"try "<<cc_try;}

		}while(!a_art.next_pair_read(a_read, a_read_2));

		if(a_read.seq_ref.find('n',0)!=string::npos || a_read_2.seq_ref.find('n',0)!=string::npos) { 
			base_count-=read_len;
		       	continue;
	       	}

		//debug info
		if(cc_try>=100){ cerr<<" was successed"<<endl; }

                qual.clear();
                qual_2.clear();
                vector<string> aln, aln2;

		short start_cyc=0; //reset start_cyc to zero 
		bool ok=true;
                if(a_read.is_plus_strand){ 
                  qdist.get_read_qual_fast(a_art.homo_plus, a_read.bpos, a_read.seq_ref, a_read.seq_read, aln, qual, start_cyc);
                  ok=qdist.get_read_qual_fast(a_art.homo_plus, a_read_2.bpos, a_read_2.seq_ref, a_read_2.seq_read, aln2, qual_2, start_cyc);
                }
                else{
                  qdist.get_read_qual_fast(a_art.homo_minus, a_read.bpos, a_read.seq_ref, a_read.seq_read, aln, qual, start_cyc);
                  ok=qdist.get_read_qual_fast(a_art.homo_minus, a_read_2.bpos, a_read_2.seq_ref, a_read_2.seq_read, aln2, qual_2, start_cyc);
                }
		if(!ok) continue;

                //statistics
                start_coverage[a_read.bpos]+=1;
                start_coverage[a_read_2.bpos]+=1;
                for(int k=0; k<a_read.seq_ref.length(); k++){
                    depth_coverage[a_read.bpos+k]+=1;
                }
                for(int k=0; k<a_read_2.seq_ref.length(); k++){
                    depth_coverage[a_read_2.bpos+k]+=1;
                }

                //output
		total_read_count++;
                osID.str("");
                osID<<id<<'_'<<total_read_count; 
                read_id = osID.str();

                read_id_2=read_id+"-2";
                read_id+="-1";
                //print first read

                FQFILE<<"@"<<read_id<<endl<<a_read.seq_read<<endl<<"+"<<endl;
                for(size_t k=0; k<qual.size(); k++){
                    FQFILE<<(char)(qual[k]+33);
                }
                FQFILE<<endl;

                ALNFILE<<">"<<id<<"\t"<<read_id<<"\t"<<a_read.bpos;
                if(a_read.is_plus_strand) ALNFILE<<"\t+\n";
                else ALNFILE<<"\t-\n";
                ALNFILE<<aln[0]<<endl<<aln[1]<<endl;

                //print second read
                FQFILE2<<"@"<<read_id_2<<endl<<a_read_2.seq_read<<endl<<"+"<<endl;
                for(size_t k=0; k<qual_2.size(); k++){
                    FQFILE2<<(char)(qual_2[k]+33);
                }
                FQFILE2<<endl;

                ALNFILE2<<">"<<id<<"\t"<<read_id_2<<"\t"<<a_read_2.bpos;
                if(a_read_2.is_plus_strand) ALNFILE2<<"\t+\n";
                else ALNFILE2<<"\t-\n";
                ALNFILE2<<aln2[0]<<endl<<aln2[1]<<endl;

                base_count-=a_art.read_len;
                if(base_count<=0){
                    the_fold-=1;
                    base_count=t_num_base;
                }
            }
            //output stat
            for (long k=0; k<t_num_base; k++){
                STATFILE<<k<<"\t"<<start_coverage[k]<<"\t"<<depth_coverage[k]<<endl;
            }
        }
        FQFILE2.close();
        ALNFILE2.close();
    }
    else{
	    //single-end reads simulaiton
        while(seq_reader.next_seq(id,a_art.ref_seq)){
            num_seq++;
            long t_num_base=(long) a_art.ref_seq.size();
            vector<int> depth_coverage(t_num_base,0);
            vector<int> start_coverage(t_num_base,0); //#read starting on the same position 
            long base_count=t_num_base; 
            int the_fold=x_fold;
            a_art.init_fast();

            while(the_fold>0){
                a_read.clear();
                read_len=qdist.get_ran_read_len();
                a_art.ini_set(read_len);
                a_art.next_read(a_read);
		read_len=a_read.seq_ref.size();
                qual.clear();

                vector<string> aln;
		short start_cyc=0; //alway start with flow cycle zero for single-end reads

		if(a_read.seq_ref.find('n',0)!=string::npos) { 
			base_count-=read_len;
		       	continue;
	       	}
                
                if(a_read.is_plus_strand){ 
                  qdist.get_read_qual_fast(a_art.homo_plus, a_read.bpos, a_read.seq_ref, a_read.seq_read, aln, qual,start_cyc);
                }
                else{
                  qdist.get_read_qual_fast(a_art.homo_minus, a_read.bpos, a_read.seq_ref, a_read.seq_read, aln, qual,start_cyc);
                }

                //statistics
                start_coverage[a_read.bpos]+=1;
                for(int k=0; k<read_len; k++){
                    depth_coverage[a_read.bpos+k]+=1;
                }

                //output
                osID.str("");
                osID<<id<<'-'<<a_read.bpos<<'-'<<start_coverage[a_read.bpos]; 
                read_id = osID.str();

                FQFILE<<"@"<<read_id<<endl<<a_read.seq_read<<endl<<"+"<<endl;
                for(size_t k=0; k<qual.size(); k++){
                    FQFILE<<(char)(qual[k]+33);
                }
                FQFILE<<endl;
	
                ALNFILE<<">"<<id<<"\t"<<read_id<<"\t"<<a_read.bpos;
                if(a_read.is_plus_strand) ALNFILE<<"\t+\n";
                else ALNFILE<<"\t-\n";
                ALNFILE<<aln[0]<<endl<<aln[1]<<endl;

                base_count-=read_len;
                if(base_count<=0){
                    the_fold-=1;
                    base_count=t_num_base;
                }
            }
            //output stat
            for (long k=0; k<t_num_base; k++){
                STATFILE<<k<<"\t"<<start_coverage[k]<<"\t"<<depth_coverage[k]<<endl;
            }
        }
    }

    FQFILE.close();
    ALNFILE.close();
    STATFILE.close();

    end = clock();
    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
    cout<<"total CPU time used: "<<cpu_time_used<<endl;
}


