// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
// ART_SOLiD -- Artificial Read Transcriber 
// Copyright(c) 2008-2011 Weichun Huang, All Rights Reserved.
// ________________________________________________________________________________
//
#include <iostream>
#include <sstream>
#include <string>
#include <time.h>
#include <algorithm>
#include <iomanip>
#include <ctime>
#include "readSeqFile.h"
#include "art.h"

using namespace std;
#define VERSION "0.9.1"
#define PRGNAME "art_SOLiD"
double SOLiDread::prob_err[max_qual_value];
//bool parse_arg(int num, char* arg);
//vector<double> SOLiDread::ins_rate;
//vector<double> SOLiDread::del_rate;
//vector<double> SOLiDread::sub_rate;
gsl_rng* art::gsl_R;
int art::gaussain_mean;
double art::gaussain_sigma;

int main(int argc, char* argv[]){

    bool is_pairend_read=false;

    string profile_name = "";
    double error_scale_factor=1;
    int i=1;
    for(;i<argc;++i){
	    char* pch = argv[i];
	    if( *pch != '-' || *(pch+1) == '\0') break;
	    while(*++pch){
		    switch(*pch){
			    case 'v':
			    case 'V':
				    cout << "================================================================="<<endl;
				    cout << "                    ART (SOLiD version 0.9.1)                    "<<endl; 
				    cout << "   Copyright (c) 2008-2011, Weichun Huang. All Rights Reserved.  "<<endl; 
				    cout << "================================================================="<<endl<<endl;
				    exit(1);
				    break;
			    case 'p':
			    case 'P':
				    if(i<argc) profile_name=argv[++i]; 
				    break;
			    case 's':
			    case 'S': 
				    if(i<argc) error_scale_factor=atof(argv[++i]);
				    break;

			    default:
				   cerr<<"Error: unreconized option \""<<*pch<< "\""<<endl;
				   break;
		    }
	    }
    }

    int k=argc-i+1;
    if(k !=5 &&  k !=7){
	cout << "================================================================="<<endl;
	cout << "                    ART (SOLiD version 0.9.1)                    "<<endl; 
	cout << "   Copyright (c) 2008-2011, Weichun Huang. All Rights Reserved.  "<<endl; 
	cout << "================================================================="<<endl<<endl;
        cout<<"USAGE FOR SINGLE-END READ SIMULATION"<<endl<<endl;
        cout<<"     "<< PRGNAME <<" [ options ] <INPUT_SEQ_FILE> <OUTPUT_FILE_PREFIX> <LEN_READ> <FOLD_COVERAGE>"<<endl<<endl;
       	cout<<"     Example:"<<endl;
       	cout<<"             "<<PRGNAME <<" seq_reference.fa ./outdir/dat_single_end 25 10"<<endl<<endl;
        cout<<"USAGE FOR PAIRED-END READ SIMULATION"<<endl<<endl;
        cout<<"     "<< PRGNAME <<" [ options ] <INPUT_SEQ_FILE> <OUTPUT_FILE_PREFIX> <LEN_READ> <FOLD_COVERAGE> <MEAN_FRAG_LEN> <STD_DEV>"<<endl<<endl;
        cout<<"     Example:"<<endl;
       	cout<<"             "<<PRGNAME <<" seq_reference.fa ./outdir/dat_paired_end 32 10  500 20"<<endl<<endl;
        cout<<"------------------------Optional Parameters------------------------"<<endl<<endl;
        cout<<"-v print out version information"<<endl;
        cout<<"-p specify user's own read profile for simulation"<<endl;
        cout<<"-s specify the scale factor adjusting error rate (e.g., 0 - zero-error rate simulation)"<<endl<<endl;

        exit(0);
    }

    bool mask_n=true; 
    short max_num_n=5; 
    int len_ref_id=250;

    //caluate CPUT time
     clock_t start, end;
     double cpu_time_used;
     start = clock();

    char* seq_file= argv[i];
    string out_file_prefix=argv[i+1];
    int read_len  = atoi(argv[i+2]);
    double x_fold  = atof(argv[i+3]);
    string num="";

    if (k==7){
        num="1";
        num="_R3";
        is_pairend_read=true; art::ini_read_pair_rand(abs(atoi(argv[i+4])),fabs(atof(argv[i+5])));
        if(art::gaussain_mean<=read_len){
            cerr<<"Error: the read length must be shorter than the mean flagment length specified"<<endl;
            exit(1);
        }
    }

    string seqfasta=out_file_prefix+num+".fa";
    string qualfasta=out_file_prefix+num+".qual";
    string alnfasta=out_file_prefix+num+".map";
    string fqfile=out_file_prefix+num+".fq";

    ofstream FQFILE(fqfile.c_str(),ios::binary);
    if(!FQFILE.is_open()) { cout<<"can not open output file: "<<fqfile<<endl; exit(0); }

    ofstream ALNFILE(alnfasta.c_str(),ios::binary);
    if(!ALNFILE.is_open()) { cout<<"can not open output file: "<<alnfasta<<endl; exit(0); }

    SOLiDread::set_err_prob();

    vector<short> qual;
    readSeqFile seq_reader(seq_file);
    string id;
    art a_art; 
    SOLiDread a_read(profile_name);
    a_read.ini_ran_qual();

    if(read_len>a_read.error_profile.size()){
        cerr<<"Error: the read length "<<read_len<<" exceeds the max length "<<a_read.error_profile.size()<<endl;
//        cerr<<"Error: the read length "<<read_len<<" exceeds the max length "<<qdist.qual_dist_first.size()<<endl<<endl;
        exit(1);
    }

    for(size_t i=0; i<a_read.cal_err_rate_1st.size(); i++){
      a_read.cal_err_rate_1st[i]*=error_scale_factor;
    }
    for(size_t i=0; i<a_read.cal_err_rate_2nd.size(); i++){
      a_read.cal_err_rate_2nd[i]*=error_scale_factor;
    }

/*
    a_read.set_rate(read_len,0.0001,2,a_read.ins_rate);
    a_read.set_rate(read_len,0.0001,2,a_read.del_rate);
    a_read.set_rate(read_len,0.028,2,a_read.sub_rate);
*/
    string aln_read,aln_ref;
    ostringstream osID;
    int num_seq=0;
    string read_id;

    unsigned long cc_num_read=1;
    if(is_pairend_read){
        string seqfasta2=out_file_prefix+"_F3.fa";
        string qualfasta2=out_file_prefix+"_F3.qual";
        string alnfasta2=out_file_prefix+"_F3.map";
        string fqfile2=out_file_prefix+"_F3.fq";
//        ofstream SEQFILE2(seqfasta2.c_str(),ios::binary);
//        if(!SEQFILE2.is_open()) { cout<<"can not open output file: "<<seqfasta2<<endl; exit(0); }

//        ofstream QUALFILE2(qualfasta2.c_str(),ios::binary);
//        if(!QUALFILE2.is_open()) { cout<<"can not open output file: "<<qualfasta2<<endl; exit(0); }

        ofstream FQFILE2(fqfile2.c_str(),ios::binary);
        if(!FQFILE2.is_open()) { cout<<"can not open output file: "<<fqfile2<<endl; exit(0); }

        ofstream ALNFILE2(alnfasta2.c_str(),ios::binary);
        if(!ALNFILE2.is_open()) { cout<<"can not open output file: "<<alnfasta2<<endl; exit(0); }
        SOLiDread a_read_2(profile_name);
        a_read_2.ini_ran_qual();

        for(size_t i=0; i<a_read_2.cal_err_rate_1st.size(); i++){
          a_read_2.cal_err_rate_1st[i]*=error_scale_factor;
        }
        for(size_t i=0; i<a_read_2.cal_err_rate_2nd.size(); i++){
          a_read_2.cal_err_rate_2nd[i]*=error_scale_factor;
        }
/*
   	a_read_2.set_rate(read_len,0.0001,2,a_read.ins_rate);
        a_read_2.set_rate(read_len,0.0001,2,a_read.del_rate);
        a_read_2.set_rate(read_len,0.036,2,a_read.sub_rate);
*/
        vector<short> qual_2;
        string read_id_2;
        string aln_read_2,aln_ref_2;
        while(seq_reader.next_seq(id,a_art.ref_seq)){ 
//            size_t p1=id.find_first_of(' '); if(p1==string::npos) p1=10; size_t p2=id.find_first_of('\t'); if(p2==string::npos) p2=10;            p1=p1<p2?p1:p2; id=id.substr(0,p1); 
            istringstream isID; isID.str(id); isID>>id; id=id.substr(0,len_ref_id); 
            num_seq++;
            a_art.ini_set(read_len);
            if(mask_n){ 
              a_art.mask_n_region(max_num_n);
            }
            long t_num_read=(long) a_art.ref_seq.size()/read_len*x_fold;
            while(t_num_read>0){
//generate SOLiD-like id
		int num_3rd=cc_num_read / 1000000 + 1;
		unsigned int num_3rd_rem=cc_num_read % 1000000;
		int num_1st=num_3rd_rem % 1000;
		int num_2nd=num_3rd_rem / 1000 + 1;
                osID<<num_3rd<<'_'<<num_2nd<<'_'<<num_1st;
                read_id = osID.str();
                osID.str("");

                a_read.clear();
                a_read_2.clear();
                //a_art.next_pair_read_indel(a_read, a_read_2); //need SOLiD profile with indel error rate 
                a_art.next_pair_read(a_read, a_read_2); 
                if(mask_n){ 
                  if(a_read.is_plus_strand){
                    if(a_art.masked_pos.count(a_read.bpos)>0 || a_art.masked_pos.count(a_read_2.bpos)>0){
                      t_num_read-=2;
                      continue;
                    }
                  }
                  else{
                    size_t bpos1=a_art.ref_seq.size()-a_read.bpos-read_len;
                    size_t bpos2=a_art.ref_seq.size()-a_read_2.bpos-read_len;
                    if(a_art.masked_pos.count(bpos1)>0 || a_art.masked_pos.count(bpos2)>0){
                      t_num_read-=2;
                      continue;
                    }
                  }
                }

		string cs_seq_1st, cs_seq_2nd;
		map<int,char> error_pos_1st, error_pos_2nd;
		map<int,char>::iterator it; 
		//convert base-space to color space, and incorporate sequencing errors
                //qual.clear();
                //qual_2.clear();
	    	a_read.convert_seq2cs(cs_seq_1st, qual, error_pos_1st, false) ; //the 1st is R3 
	    	a_read_2.convert_seq2cs(cs_seq_2nd, qual_2, error_pos_2nd) ;  //the 2nd is F3 
//                read_id_2=read_id+"-2";
//                read_id+="-1";
                read_id_2=read_id+"_F3";
                read_id+="_R3";
//print first read
//                SEQFILE<<">"<<read_id<<endl<<a_read.seq_read<<endl; //<<a_read.seq_ref<<endl;
//                QUALFILE<<">"<<read_id<<endl;
//                copy(qual.begin(),qual.end(), ostream_iterator<short>(QUALFILE,"\t"));
//                QUALFILE<<endl;

                FQFILE<<"@"<<read_id<<endl<<'G'<<cs_seq_1st<<endl<<"+"<<endl;
                for(size_t k=0; k<qual.size(); k++){
                    FQFILE<<(char)(qual[k]+33);
                }
                FQFILE<<endl;

                ALNFILE<<id<<"\t"<<read_id<<"\t"<<a_read.bpos;
                if(a_read.is_plus_strand) ALNFILE<<"\t+";
                else ALNFILE<<"\t-";
		ALNFILE<<"\t"<<error_pos_1st.size();
		for(it=error_pos_1st.begin(); it!=error_pos_1st.end(); it++){
			ALNFILE<<"\t"<<it->first<<"\t"<<it->second<<cs_seq_1st[it->first];
		}
		ALNFILE<<endl;

		/*
                if(a_read.get_aln(aln_read,aln_ref)){
                    ALNFILE<<aln_ref<<endl<<aln_read<<endl;
                }
                else{
                    ALNFILE<<a_read.seq_ref<<endl<<a_read.seq_read<<endl;
                }
		*/
//print second read
//                SEQFILE2<<">"<<read_id_2<<endl<<a_read_2.seq_read<<endl; //<<a_read.seq_ref<<endl;
//                QUALFILE2<<">"<<read_id_2<<endl;
//                copy(qual_2.begin(),qual_2.end(), ostream_iterator<short>(QUALFILE2,"\t"));
//                QUALFILE2<<endl;

                FQFILE2<<"@"<<read_id_2<<endl<<'T'<<cs_seq_2nd<<endl<<"+"<<endl;
                for(size_t k=0; k<qual_2.size(); k++){
                    FQFILE2<<(char)(qual_2[k]+33);
                }
                FQFILE2<<endl;

                ALNFILE2<<id<<"\t"<<read_id_2<<"\t"<<a_read_2.bpos;
                if(a_read_2.is_plus_strand) ALNFILE2<<"\t+";
                else ALNFILE2<<"\t-";
		ALNFILE2<<"\t"<<error_pos_2nd.size();
		for(it=error_pos_2nd.begin(); it!=error_pos_2nd.end(); it++){
			ALNFILE2<<"\t"<<it->first<<"\t"<<it->second<<cs_seq_2nd[it->first];
		}
		ALNFILE2<<endl;
/*
                if(a_read_2.get_aln(aln_read_2,aln_ref_2)){
                    ALNFILE2<<aln_ref_2<<endl<<aln_read_2<<endl;
                }
                else{
                    ALNFILE2<<a_read_2.seq_ref<<endl<<a_read_2.seq_read<<endl;
                }
*/
                t_num_read-=2;
                cc_num_read+=1;
            }
        }
//        SEQFILE2.close();
//        QUALFILE2.close();
        FQFILE2.close();
        ALNFILE2.close();
    }
    else{
        while(seq_reader.next_seq(id,a_art.ref_seq)){
            istringstream isID; isID.str(id); isID>>id; id=id.substr(0,len_ref_id); 
            num_seq++;
            a_art.ini_set(read_len);
            long t_num_read=(long) a_art.ref_seq.size()/read_len*x_fold;
            while(t_num_read>0){
//              osID<<num_seq<<fixed<<setfill('0')<< setw(10)<< t_num_read;
/*
                osID<<id<<'-'<<t_num_read;
                read_id = osID.str();
                osID.str("");
*/

//generate SOLiD-like id
		int num_3rd=cc_num_read / 1000000 + 1;
		unsigned int num_3rd_rem=cc_num_read % 1000000;
		int num_1st=num_3rd_rem % 1000;
		int num_2nd=num_3rd_rem / 1000 + 1;

                osID<<num_3rd<<'_'<<num_2nd<<'_'<<num_1st;
                read_id = osID.str();
                osID.str("");
		read_id +="_F3";

                a_read.clear();
                //a_art.next_read(a_read);
                a_art.next_read(a_read);
                if(mask_n){ 
                  if(a_read.is_plus_strand){
                    if(a_art.masked_pos.count(a_read.bpos)>0){
                      t_num_read-=1;
                      continue;
                    }
                  }
                  else{
                    size_t bpos=a_art.ref_seq.size()-a_read.bpos-read_len;
                    if(a_art.masked_pos.count(bpos)>0){
                      t_num_read-=1;
                      continue;
                    }
                  }
                }

 		string cs_seq_1st;
		map<int,char> error_pos_1st;
		map<int,char>::iterator it; 
                //qua.clear();
	    	a_read.convert_seq2cs(cs_seq_1st, qual, error_pos_1st) ;
                FQFILE<<"@"<<read_id<<endl<<'T'<<cs_seq_1st<<endl<<"+"<<endl;
                for(size_t k=0; k<qual.size(); k++){
                    FQFILE<<(char)(qual[k]+33);
                }
                FQFILE<<endl;

                ALNFILE<<id<<"\t"<<read_id<<"\t"<<a_read.bpos;
                if(a_read.is_plus_strand) ALNFILE<<"\t+";
                else ALNFILE<<"\t-";
		ALNFILE<<"\t"<<error_pos_1st.size();
		for(it=error_pos_1st.begin(); it!=error_pos_1st.end(); it++){
			ALNFILE<<"\t"<<it->first<<"\t"<<it->second<<cs_seq_1st[it->first];
		}
		ALNFILE<<endl;

                t_num_read--;
                cc_num_read+=1;
            }
        }
    }

//    SEQFILE.close();
//    QUALFILE.close();
    FQFILE.close();
    ALNFILE.close();
    end = clock();
    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
    cout<<"total CPUT time used: "<<cpu_time_used<<endl;
    //      ofstream outGlobal(outFile.c_str());
    //      if(!outGlobal.is_open()) { cout<<"can not open output file: "<<outFile<<endl; exit(0); }
    //      ostream_iterator <char, char, char_traits <char> > os(outGlobal,"");		
}

//
//bool parse_arg(int num, char* arg){
//    bool success=true;
//    int i=1;
//    for(;i<ARGC;++i){
//       	char* pch = ARGV[i];
//       	if( *pch != '-' || *(pch+1) == '\0') break;
//       	while(*++pch && success){
//            switch(*pch){
//                // Version Information
//            case 'v':
//            case 'V':
//                prtVersion = true;
//                break;
//                // Help Information
//            case 'h':
//            case 'H':
//                prtVersion = true;
//                prtUsage = true;
//                break;
//                // Unbuffered Output
//            case 't':
//            case 'T':
//                html_out=false;
//                break;
//            case 'b':
//            case 'B':
//                if(i<ARGC) browser=ARGV[++i]; 
//                else success=PrintErr("Invalid value for option: \"%c\"", *pch);
//                break;
//            case 'o':
//            case 'O':
//                if(i<ARGC) outFile=ARGV[++i]; 
//                else success=PrintErr("Invalid value for option: \"%c\"", *pch);
//                break;
//            case 'c':
//            case 'C':
//                if(i<ARGC) cfgFile=ARGV[++i]; 
//                else success=PrintErr("Invalid value for option: \"%c\"", *pch);
//                break;
//                // Error Reporting
//            default:
//                success=PrintErr("Unreconized switch, \"%c\"", *pch);
//                prtUsage = true;
//                prtVersion = true;
//                break;
//            }
//       	}
//    }
//
//    // Print Version and/or Usage Information and exit
//    if(prtVersion || prtUsage){
//       	if(prtUsage) printUsage();
//       	if(prtVersion) printVer();
//       	return false;
//    }
//
//    if(i>ARGC){ printUsage(); return false;}
//
//    aceFile=ARGV[i];
//    return success;;
//}

