#!/usr/bin/perl

if(@ARGV != 1) {
 
    print ("\nUSAGE: get_graph.pl <acefile>\n\n".
           "acefile\t= Acefile <pref>.ace containing the assembly \n\n".
	   "The contig graph is written to <pref>.dot (graphviz format) and <pref>.ps\n".
	   "The supporting information in the form of contig links and coverage statistics will\n".
           "be written to the files <pref>.links and <pref>.coverage respectively\n\n"); 
 
    exit;
}

($acefile) = @ARGV;

open(IN, $acefile); $bases = 0;
$coverage_sum = 0; $ctg_count = 0;
$large_size = 10000;

print "Step 1: Parsing the Acefile.\n";

while($line = <IN>) {

    if($line =~ m/^CO contig(\d+)/) {

	$name = 0+$1;

	$length = 0;
	while($line = <IN>) {

	    last if($line =~ m/^BQ/);
	    chomp $line; $line =~ s/\*//g; 
	    $length += length($line);
	}
	
	$length{$name} = $length;

	if($bases != 0) {

	    $coverage{$last_name} = $bases/$length{$last_name};
	    if($length{$last_name} > $large_size) {

		$coverage_sum += $coverage{$last_name}; 
		$ctg_count++;
	    }
	}

	$bases = 0; $last_name = $name;
    }

    if($line =~ m/^AF (\S+(to|fm)\d+) \w (\S+)/) {

	$read_name = $1; $offset = $3; $read_name =~ s/(\.|\_).+//g; 

	while($line =~ m/(to|fm)(\d+)/g) {

	    $connect{sprintf("$read_name %s", join(" ", sort($name, $2)))} .= ($offset <= 10 ? "$name:B " : "$name:E "); 
	}
    }

    if($line =~ m/^RD (\w+)(\.\d+\-\d+)?/) {

	$read_name = $1; $range = $2;

	if($range ne "") {

	    $range =~ m/(\d+)\-(\d+)/; $start = $1; $end = $2;
	}

	$read = "";
	while($line = <IN>) {

	    last if($line =~ m/^QA/);
	    chomp $line;
	    $read .= $line;
	}

	if($range ne "") {
	
	    ($start, $end) = ($end, $start) if($start > $end);	
	    $read = substr($read, $start, $end-$start+1);
	} 

	$read =~ s/\*//g; 
	$bases += length($read);	
    }
}
close(IN);

$coverage{$last_name} = $bases/$length{$last_name};
if($length{$last_name} > $large_size) {

    $coverage_sum += $coverage{$last_name}; 
    $ctg_count++;
}
$average_coverage = $coverage_sum/$ctg_count;

print "Step 2: Collecting the link information.\n";

$acefile =~ m/(.*)\.ace/; $prefix = $1;
open(LOG, ">$prefix\.log");
while(($id, $loc) = each %connect) {

    @loc = split(' ', $loc);

    if(@loc != 2) {

	print LOG "ERROR: Cannot parse these links: $id, @loc\n";
	next;
    }

    ($ctg1, $or1) = split(":", $loc[0]);     
    ($ctg2, $or2) = split(":", $loc[1]);

    ($ctg1, $or1, $ctg2, $or2) = ($ctg2, $or2, $ctg1, $or1) if($ctg1 > $ctg2);
    $ctg1 = sprintf("contig%05d", $ctg1); $ctg2 = sprintf("contig%05d", $ctg2);

    $normal{"$ctg1 -- $ctg2"}++ if($or1 eq "E" && $or2 eq "B");
    $anti{"$ctg1 -- $ctg2"}++ if($or1 eq "B" && $or2 eq "E");
    $innie{"$ctg1 -- $ctg2"}++ if($or1 eq "E" && $or2 eq "E");
    $outie{"$ctg1 -- $ctg2"}++ if($or1 eq "B" && $or2 eq "B");
    $adjacent{$ctg1}++; $adjacent{$ctg2}++;
}
close(LOG);

printf "Step 3: Writing out the graph (Average Coverage = %.2f)\n", $average_coverage;

open(DOT, ">$prefix\.dot");
print DOT "graph ROOT {\nsize=\"8.5,11\"\nrankdir=LR\ncoutrotate=90\n";

open(GRAPH, ">$prefix\.graph"); $count = 1;
foreach $contig (sort {$a =~ m/(\d+)/; $c1 = $1; $b =~ m/(\d+)/; $c1 <=> $1} keys(%length)) {

    $ctg = sprintf("contig%05d", $contig);
    print DOT ("$ctg [shape=house, orientation=270,".
	       ($length{$contig} > $large_size ? " color=lightblue, style=filled," : " color=black,")." label=\"$ctg, ".
	       ($coverage{$contig} > 1.5*$average_coverage ? int($coverage{$contig}/$average_coverage + 1) : 1).
	       sprintf(", %.0f, $length{$contig}\"]\n", $coverage{$contig})) 
	if($adjacent{$ctg} > 0 || $length{$contig} > $large_size);

    printf GRAPH "%d\t$ctg\t$length{$contig}\t%.1f\n", $count++, $coverage{$contig};
}

foreach $pair (keys(%normal)) {

    print DOT "$pair [label = \"$normal{$pair}\", arrowhead=vee, arrowtail=crow]\n";
    ($ctg1, $dummy, $ctg2) = split(/\s+/, $pair); $ctg1 =~ s/contig0+//; $ctg2 =~ s/contig0+//; 
    printf GRAPH "C\t$ctg1\t3\'\t$ctg2\t5'\t%d\n", $normal{$pair}; 
}

foreach $pair (keys(%anti)) {

    print DOT "$pair [label = \"$anti{$pair}\", arrowhead=crow, arrowtail=vee]\n";
    ($ctg1, $dummy, $ctg2) = split(/\s+/, $pair); $ctg1 =~ s/contig0+//; $ctg2 =~ s/contig0+//; 
    printf GRAPH "C\t$ctg1\t5\'\t$ctg2\t3'\t%d\n", $anti{$pair}; 
}

foreach $pair (keys(%innie)) {

    print DOT "$pair [label = \"$innie{$pair}\", arrowhead=crow, arrowtail=crow]\n";
    ($ctg1, $dummy, $ctg2) = split(/\s+/, $pair); $ctg1 =~ s/contig0+//; $ctg2 =~ s/contig0+//; 
    printf GRAPH "C\t$ctg1\t3\'\t$ctg2\t3'\t%d\n", $innie{$pair}; 
}

foreach $pair (keys(%outie)) {

    print DOT "$pair [label = \"$outie{$pair}\", arrowhead=vee, arrowtail=vee]\n";
    ($ctg1, $dummy, $ctg2) = split(/\s+/, $pair); $ctg1 =~ s/contig0+//; $ctg2 =~ s/contig0+//; 
    printf GRAPH "C\t$ctg1\t5\'\t$ctg2\t5'\t%d\n", $outie{$pair}; 
}

print DOT "}\n";
close(DOT);
close(GRAPH);

`dot -Tps $prefix\.dot > $prefix\.ps`;		
