#!/usr/bin/perl
#this tool is to convert native 454 paired-end read data generated by ART_454 to the standard paired-end read data format 
#Author: Weichun Huang at whduke@gmail.com
use File::Basename;
use File::Copy;
if (@ARGV!=2){
        print STDERR "\nThis tool converts the Native format of 454 paired-end reads to the standard format\n\n";
	print STDERR "USAGE:\n\t$0 input_dir output_dir\n";
	print STDERR <<DESCRIPTION

NOTES:
	The read directions are different between the Native and Standard formats of
	454 paired-end reads.  ART_454 simulator generates 454 paired-end reads in
	the Native format used by 454 SFF files.  However, some tools, e.g., velvet,
	can use only the standard paired-end format.  art_454_native2std can be used 
	to convert ART data files (*.sam and *.fq files) in the native format to the 
	Standard format of paired-end reads.  

 	The NATIVE format 
	The native format of 454 paired-end reads, i.e., those directly
	from 454 SFF files, are always from the same DNA strand as in the following
	two formats:

	1) ----------->..................................--------------->
	2) <-----------...................................<--------------

	The STANDARD format

	<------------..................................--------------->

CONTACT:  whduke\@gmail.com

DESCRIPTION
;

	exit(1);
}
#0x01|0x02|0x40 -> 0x01|0x02|0x40|0x20
#0x01|0x02|0x80 -> 0x01|0x02|0x80|0x10
my ($p1, $p2)=(0x01|0x02|0x40,0x01|0x02|0x80);
my ($n_p1, $n_p2)=(0x01|0x02|0x40|0x20,0x01|0x02|0x80|0x10);
#0x01|0x02|0x40|0x10|0x20 -> 0x01|0x02|0x40|0x10 
#0x01|0x02|0x80|0x10|0x20 -> 0x01|0x02|0x80|0x20
my ($n1, $n2) = (0x01|0x02|0x40|0x10|0x20,0x01|0x02|0x80|0x10|0x20);
my ($n_n1, $n_n2) = (0x01|0x02|0x40|0x10,0x01|0x02|0x80|0x20);

my ($dir, $odir)=@ARGV;
if ($dir eq $odir){
	die "input and output directory cannot be the same\n";
}
if (! -d $odir){
	mkdir($odir) or die "cannot create dir $odir\n";
}
opendir(DIR, $dir) or die $!; 
while (my $file = readdir(DIR)) {
       	if ($file =~/\.sam$/){
	       	print "processing file $file\n";
		open(SAM, "<$dir/$file") or die $!;
		my $nfile=$file;
		open(SAMNEW, ">$odir/$nfile") or die $!;
		while(<SAM>){
			if(/^@/){
			       	print SAMNEW; 
			       	next 
			};
			next if(!/\w/);
			s/\s+$//;
			my @fl=split /\t/;
			if($fl[1]==$p1){
			       	$fl[1]=$n_p1;
			}
			elsif($fl[1]==$p2){
			       	$fl[1]=$n_p2;
			}
			elsif($fl[1]==$n1){
			       	$fl[1]=$n_n1;
			}
			elsif($fl[1]==$n2){
			       	$fl[1]=$n_n2;
			}
			print SAMNEW join("\t",@fl),"\n";
		}
		close(SAM);
		close(SAMNEW);
	}
	elsif ($file =~ /2.fq$/){
	       	print "processing file $file\n";
		open(FQ2, "<$dir/$file") or die $!;
		my $nfile=$file;
		open(FQ2NEW, ">$odir/$nfile") or die $!;
		while(<FQ2>){
			if(/^\@seq/){
				print FQ2NEW;
			       	my $seq=<FQ2>;
				$seq=~s/\s+$//;
				print FQ2NEW dna_RC($seq),"\n";
			}
			else{
				print FQ2NEW;
			}
		}
		close(FQ2);
		close(FQ2NEW);
	}
	elsif ($file =~ /\D.fq$/){
		my $nfile=$odir."/".$file;
	       	copy("$dir/$file",$nfile) or die "Copy failed: $!";
	}
	else{
		next;
	}
}
closedir(DIR);

sub dna_RC{
       	my ($dna) = @_;
       	my $revcomp = reverse($dna);
       	$revcomp =~tr/ACGTacgt/TGCAtgca/;
       	return $revcomp;
}

