#include <vector>
#include <cstdlib>
#include <map>
#include <string>
#include <sstream>
#include <iostream>
#include <algorithm>
#include <fstream>
#include <cmath>
#include <set>
#include <unistd.h>
#include "MersenneTwister.h"

using namespace std;

#define DEBUG 0
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define MIN(a,b) ((a) > (b) ? (b) : (a))
#define INF 1e20

const double error_rate = 0.1;
const double SDMin = 4;
const double SDMax = 12;
double current_sd;

#include "parseargs.h"
#include "dp.h"

template<class data_t>
void permute(vector<data_t>& data) {

  MTRand generator;

  for(int i = data.size()-2; i >= 0; i--) 
    swap(data[i], data[i+generator.randInt(data.size()-i-1)]);
}

bool sequence_sites_lt(const pair<string, vector<int> >& a, const pair<string, vector<int> >& b) {

  return a.second.size() > b.second.size();
}

vector<int> replace(const vector<int>& original, const vector<int>& positions, const vector<int>& replacements) {

  vector<int> copy = original;

  for(int i = 0; i < positions.size(); i++)
    copy[positions[i]] = replacements[i];

  return copy;
}

int main(int argc, char ** argv)
{
  ParseArgs (argc, argv);

  ifstream map_file(OPT_OpticalMap.c_str());

  double size; double sd;
  vector<pair<int, int> > optical_pieces;

  //
  // Reading map data and translating sizes from Kbp to bp.
  //
  while(map_file >> size >> sd)
    optical_pieces.push_back(pair<int, int>(int(size*1000), int(sd*1000)));

  int map_size = optical_pieces.size();
  map_file.close();

  //
  // Trick to handle circular optical maps.
  //
  if(OPT_CircularFlag == 1)
    for(int i = 0; i < map_size; i++)
      optical_pieces.push_back(optical_pieces[i]);

  map<string, vector<int> > sequence2sites;
  map<string, int> sequence2sizes;

  ifstream sequence_file(OPT_SilicoMap.c_str());

  string id; int length; int num_sites;
  while(sequence_file >> id >> length >> num_sites) {

    sequence2sizes[id] = length;

    for(int i = 0; i < num_sites; i++) {

      int site; sequence_file >> site;
      sequence2sites[id].push_back(site);
    }
  }
  sequence_file.close();

  vector<pair<string, vector<int> > > sequence_sites;
  for(map<string, vector<int> >::iterator it = sequence2sites.begin();
      it != sequence2sites.end(); it++) 
    sequence_sites.push_back(*it);

  sort(sequence_sites.begin(), sequence_sites.end(), sequence_sites_lt);

  string marks[map_size]; 

  int two_or_more = 0; int placed = 0; 
  int others_omit = 0; int sig_omit = 0;

  ofstream log_file((OPT_OutputPrefix + ".match-log").c_str());
  ofstream match_file((OPT_OutputPrefix + ".match").c_str());
  ofstream unique_match_file((OPT_OutputPrefix + ".unique_match").c_str());

  for(vector<pair<string, vector<int> > >::iterator it = sequence_sites.begin();
      it != sequence_sites.end(); it++) {

    sort(it->second.begin(), it->second.end());

    vector<int> sequence_pieces; sequence_pieces.push_back(it->second[0]);
    for(vector<int>::iterator it2 = it->second.begin()+1; it2 != it->second.end(); it2++) 
      if(*it2-*(it2-1) >= small_sequence_size)
	sequence_pieces.push_back(*it2-*(it2-1));
  
    sequence_pieces.push_back(sequence2sizes[it->first] - it->second[it->second.size()-1]);

    if(sequence_pieces.size() <= 2)
      continue;

    two_or_more++;

    stringstream fragment_string;
    for(vector<int>::iterator it2 = it->second.begin(); it2 != it->second.end(); it2++)
      fragment_string << *it2 << " ";

    vector<int> reverse_sequence_pieces = sequence_pieces;
    reverse(reverse_sequence_pieces.begin(), reverse_sequence_pieces.end());

    if(sequence_pieces.size() <= 5)
      current_sd = SDMax;
    else
      current_sd = SDMin;

  START:

    match_bonus = max_num_of_matches*pow(current_sd, 2);

    vector<match_result_t> others; 
    match_result_t result = match(sequence_pieces, optical_pieces, true, others, it->first, true);

    if(sequence_pieces.size() > 3) {

      match_result_t rev_result = match(reverse_sequence_pieces, optical_pieces, true, others, it->first, false);
 
      if(rev_result.score > result.score) {
	others.push_back(result);
	result = rev_result;
      }
      else
	others.push_back(rev_result);
    }

    double tail_count = 0; 
    if(OPT_PT_threshold > 0) {

      vector<int> sequence_pieces_copy; vector<int> positions;
      for(int i = 1; i < sequence_pieces.size()-1; i++) 
	if(sequence_pieces[i] >= small_sequence_size) {
	  sequence_pieces_copy.push_back(sequence_pieces[i]);
	  positions.push_back(i);
	}

      if(sequence_pieces_copy.size() > 3) {

	double sample_count = (sequence_pieces_copy.size() > 5 ? 0 : -2);
	sort(sequence_pieces_copy.begin(), sequence_pieces_copy.end());

	do {

	  sample_count++;
	  match_result_t permute_result = match(replace(sequence_pieces, positions, sequence_pieces_copy), 
						optical_pieces, false, others, "-1", true);
	  match_result_t rev_permute_result = match(replace(reverse_sequence_pieces, positions, sequence_pieces_copy), 
						    optical_pieces, false, others, "-1", false);
	  
	  if((permute_result.score >= result.score &&
	      permute_result.start_index != result.start_index) || 
	     (rev_permute_result.score >= result.score &&
	      rev_permute_result.start_index != result.start_index))
	    tail_count++;

	  if(sequence_pieces_copy.size() > 5)
	    permute(sequence_pieces_copy);

	} while((sequence_pieces_copy.size() > 5 && sample_count < 200) ||
		(sequence_pieces_copy.size() <= 5 && 
		 next_permutation(sequence_pieces_copy.begin(), sequence_pieces_copy.end()))); 

	tail_count /= sample_count;
      }

      result.pval = tail_count;
    }

    log_file << "Matching with standard deviation threshold: " << current_sd << endl
             << result.get_string(sequence2sizes[result.id]);

    vector<match_result_t> similar;
    int alt_count = 0;

    if((OPT_PT_threshold != 0 && tail_count > OPT_PT_threshold) ||
       double(result.misses)/double(2*result.matches + result.misses) > error_rate) {

      if(current_sd < SDMax) {
      
	current_sd++;
	goto START;
      }

      if(result.id != "scf1119720624916" &&
	 result.id != "scf1119720624029" &&
	 result.id != "scf1119720624986" &&
	 result.id != "scf1119720624951" &&
	 result.id != "scf1119720624867" &&
	 result.id != "scf1119720624356" &&
	 result.id != "scf1119720624904" &&
	 result.id != "scf1119720624945" &&
	 result.id != "scf1119720623825" &&
	 result.id != "scf1119720624018" &&
	 result.id != "scf1119720625133" &&
	 result.id != "scf1119720625168" &&
	 result.id != "scf1119720623862" &&
	 result.id != "scf1119720623529" &&
	 result.id != "scf1119720624915" &&
	 result.id != "scf1119720625161" &&
	 result.id != "scf1119720624903" &&
	 result.id != "scf1119720624779" &&
	 result.id != "scf1119720623341" &&
	 result.id != "scf1119720623344") {

	sig_omit++; log_file << endl; continue;
      }
      else
	goto FORCE;
    } 

    log_file << "{";

    sort(others.begin(), others.end());
    others.erase(unique(others.begin(), others.end()), others.end());

    for(int i = 0; i < others.size(); i++) {

      if(others[i].start_index >= map_size)
	continue;

      if(result.matches > 1 && others[i].matches > 1) {

	 others[i].fval = betai((result.matches-1)/2.0, (others[i].matches-1)/2.0, 
			       (result.chi_square+2)/(result.chi_square + others[i].chi_square+4));
	
	if(others[i].misses != result.misses || others[i].start_index == result.start_index ||
	   others[i].fval < OPT_FT_threshold)
	  continue;
      }

      alt_count++; others[i].id = result.id; similar.push_back(others[i]);
      log_file << others[i].get_string(sequence2sizes[others[i].id]);
    }

    log_file << "}" << endl; 
    
    if(alt_count != 0 || result.matches <= 1) {

      similar.push_back(result); 

      for(int i = 0; i < similar.size(); i++)
	match_file << similar[i].get_string(sequence2sizes[similar[i].id])
		   << fragment_string.str() << endl;

      others_omit++; log_file << endl; continue;
    }

  FORCE: 

    unique_match_file << result.get_string(sequence2sizes[result.id])
		      << fragment_string.str() << endl;
    
    marks[result.start_index+1] += (" [" + it->first + " ");
    marks[(result.end_index-1)%map_size] += (" " + it->first + "] ");
    placed++;

    //
    // Filtering out match regions!!!
    //
    for(int i = result.start_index+1; i <= result.end_index-1; i++) {

      optical_pieces[i%map_size].second = 0;
      if(OPT_CircularFlag)
	optical_pieces[map_size + i%map_size].second = 0;
    }

    log_file << endl;
  }

  log_file.close();
  match_file.close();
  unique_match_file.close();

  ofstream summary_file((OPT_OutputPrefix + ".match-summary").c_str());
  summary_file << "Total: " << sequence_sites.size() << " >1: " << two_or_more 
	       << " Placed: " << placed << " Others-omit: " << others_omit 
	       << " Sig-omit: " << sig_omit << endl;
 
  for(int i = 0; i < map_size; i++) 
    summary_file << i << ": " << marks[i];

  summary_file << endl;
  summary_file.close();

  return EXIT_SUCCESS;
}
