#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "defines.h"

// String copy with filter, i.e. excludes the character passed */
void strcpyf(char *dst, const char *src, char exclude) {
	while (1) {
		if (*src != exclude)
			*(dst++) = *src;
		if (!*src) break;
		++src;
	}
}

char *alloc_char(int );
char **alloc_char_char(int ,int );

/* Allocates memory for a new fasta file. */
FASTA_file *alloc_fasta(void) {
	FASTA_file *f = malloc(sizeof(FASTA_file));
	assert(NULL != f);

	f->max_sequence_count = FASTA_SQ_CN_GUESS;
	f->sequence_count = 0;	

	f->seqs = calloc(f->max_sequence_count, sizeof(FASTA_entry));
	assert(NULL != f->seqs);

	return f;
}

/* Write a fasta file to the filename given. Returns 0 on success,
   -1 otherwise. */
int write_fasta(FASTA_file *f, const char *filename) {

	FILE *out = fopen(filename, "w");
	if (NULL == out) return -1;

	// Write out each of the sequences in the file now...
	for (int i = 0; i < f->sequence_count; ++i) {
		fprintf(out, ">%s%s", 
			f->seqs[i].name,
			(f->seqs[i].name[strlen(f->seqs[i].name)-1] == '\n'?
				"" : "\n"));
		// Keep it to the width the user specified (60 by default)
		for (int j = 0; j < f->seqs[i].seq_len; j += FASTA_LINE) {
			fwrite(f->seqs[i].sequence + j, 
				min(FASTA_LINE, f->seqs[i].seq_len - j),
				1, out);
			fputc('\n', out);

		}
	}
	

	return 0;
}


/* Adds data to a sequence, increasing the memory allocation if needed. 
   The data to be added is a NULL-terminated string; NULL-termination is
   maintained in the resulting concatenated string. */
void fasta_add_data(FASTA_file *f, size_t i, char *data) {
	size_t nlen = strlen(data);
	FASTA_entry *cr = f->seqs+i;

	if (nlen + cr->seq_len > cr->max_seq_len) {
		cr->max_seq_len *= 2;
		cr->sequence = realloc(cr->sequence, cr->max_seq_len);
		assert(NULL != cr->sequence);
	}
	strcpyf(cr->sequence+cr->seq_len, data, '\n');

	cr->seq_len += nlen-1;
}

void fasta_set_name(FASTA_file *f, size_t i, char *name) {
	strncpy(f->seqs[i].name, name, FASTA_NAME_MAXLEN-1);
}

/* Do this when you finish reading a sequence; it lets the system release
   any excess memory that was allocated for the entry. This might not help much
   since it's likely to be pretty fragmented anyway (for small sequences), but
   for large sequences it could potentally save many kB of memory, which would
   be significant for large numbers of large sequences... 

   Despite the finality of the name, you can still add data after calling this 
   function, but that would be rather inefficient. */
void fasta_finish(FASTA_file *f, size_t i) {
	FASTA_entry *cr = f->seqs+i;
	cr->sequence = realloc(cr->sequence, cr->seq_len);
	assert(cr->sequence != NULL);
	cr->max_seq_len = cr->seq_len;
}


/* Allocates memory for a new sequence, returning the index. May not actually
   allocate memory, in the event that there is already memory available. */
size_t fasta_add_seq(FASTA_file *f) {

	/* Expand the number of sequences in the file if need be. 
		FIXME: not working yet*/
	assert(f->sequence_count + 1 < f->max_sequence_count);
	if (f->sequence_count + 1 >= f->max_sequence_count) {
		if (f->max_sequence_count < 8192)
			f->max_sequence_count *= 2;
		else
			f->max_sequence_count += 1024;

		f->seqs = realloc(f->seqs, f->max_sequence_count);
		assert(NULL != f->seqs);
	}

	/* Initialize the new entry. */

	f->seqs[f->sequence_count].seq_len = 0;
	f->seqs[f->sequence_count].max_seq_len = FASTA_LEN_GUESS;
	f->seqs[f->sequence_count].sequence = malloc(FASTA_LEN_GUESS);
	assert(NULL != f->seqs[f->sequence_count].sequence);
	
	f->seqs[f->sequence_count].name = malloc(FASTA_NAME_MAXLEN);
	assert(NULL != f->seqs[f->sequence_count].name);

	return f->sequence_count++;


}


FASTA_file *open_fasta(const char *filename) {
	char buffer[FASTA_LINE_MAX];
	size_t cseq = 0;

	FILE *file = fopen(filename, "r");
	assert(NULL != file);

	FASTA_file *f = alloc_fasta();
	
	while (NULL != fgets(buffer, FASTA_LINE_MAX, file)) {
		if (buffer[0] == ';') // Comment.
			continue;
		else if (buffer[0] == '>') { // New sequence
			fasta_finish(f, cseq);
			cseq = fasta_add_seq(f);
			fasta_set_name(f, cseq, buffer+1);
		} else { // Continue reading in existing sequence.
			fasta_add_data(f, cseq, buffer);
		}
	}
	
	return f;
}



