#!/usr/bin/perl
# Join multiple ABySS distance estimate files.
# Written by Shaun Jackman <sjackman@bcgsc.ca>.
use strict;
use Getopt::Long;
use Pod::Usage;

my %opt;
GetOptions(\%opt, qw'help man');
pod2usage(-verbose => 1) if $opt{help};
pod2usage(-verbose => 2) if $opt{man};

my (%id, %a, %b);

while (<>) {
	chomp;
	my ($id, $rest) = split ' ', $_, 2;
	my ($a, $b) = split ';', $rest, 2;
	my @a = split ' ', $a;
	my @b = split ' ', $b;
	$id{$id} = 1;

	for (@a) {
		my ($to, $d, $n, $sd) = split ',';
		if (!exists $a{$id}{$to} || $sd < $a{$id}{$to}) {
			$a{$id}{$to} = "$sd $d $n";
		}
	}

	for (@b) {
		my ($to, $d, $n, $sd) = split ',';
		if (!exists $b{$id}{$to} || $sd < $b{$id}{$to}) {
			$b{$id}{$to} = "$sd $d $n";
		}
	}
}

for my $id (sort {$a<=>$b} keys %id) {
	print $id;
	for my $to (sort {$a<=>$b} keys %{$a{$id}}) {
		my ($sd, $d, $n) = split ' ', $a{$id}{$to};
		print " $to,$d,$n,$sd";
	}
	print ' ;';
	for my $to (sort {$a<=>$b} keys %{$b{$id}}) {
		my ($sd, $d, $n) = split ' ', $b{$id}{$to};
		print " $to,$d,$n,$sd";
	}
	print "\n";
}

=pod

=head1 NAME

abyss-joindist -
Join multiple ABySS distance estimate files.

=head1 SYNOPSIS

B<abyss-joindist> F<in.dist>... >F<out.dist>

=head1 AUTHOR

Written by Shaun Jackman.

=head1 REPORTING BUGS

Report bugs to <abyss-users@googlegroups.com>.

=head1 COPYRIGHT

Copyright 2009 Canada's Michael Smith Genome Science Centre

=head1 SEE ALSO

L<ABYSS(1)>

http://www.bcgsc.ca/platform/bioinfo/software/abyss
