1.	########################################################################
2.	use strict;
3.	use Data::Dumper;
4.	use Carp;
5.	$| = 1;
6.	
7.	
8.	#
9.	# This is a SAS Component
10.	#
11.	
12.	=head1 svr_neighbors_of
13.	
14.	    svr_neighbors_of neighbor_ids.tbl
15.	
16.	Get neighbors of protein-encoding genes (PEGs)
17.	
18.	The standard input should be a tab-separated table (i.e., each line 
19.	is a tab-separated set of fields).  Normally, the last field in each
20.	line would contain the PEG for which a list of neighbors is being requested.
21.	If some other column contains the feature IDs, use
22.	
23.	    -c n
24.	
25.	where n is the column (from 1) that contains the PEG in each case.  You are
26.	allowed to specify the number of neighbors you want using 
27.	
28.	     -N n 
29.	
30.	where n in this case is the number to each side of a given PEG.  That
31.	is, a value of 5 would lead to a result composed of the 5 genes to the
32.	left and the 5 genes to the right. 
33.	
34.	This is a pipe command. The input is taken from the standard input, and the
35.	output is to the standard output.
36.	
37.	=head2 Command-Line Options
38.	
39.	=over 4
40.	
41.	=item -c Column
42.	
43.	This is used only if the column containing PEGs is not the last.
44.	
45.	=item -N number-neighbors
46.	
47.	This value is the number to the left and the number to the right of the given gene.
48.	Hence, you would normally get twice this number as the detected set of neighbors.  You
49.	may get less if the given gene occurs near the beginning or end of a contig.
50.	
51.	=head2 Output Format
52.	
53.	The standard output is a tab-delimited file. It consists of the input
54.	file with an extra column added (i.e., the column containing the comma-separated
55.	list of neighbors).
56.	
57.	=back
58.	
59.	=cut
60.	
61.	use SeedUtils;
62.	use SAPserver;
63.	my $sapObject = SAPserver->new();
64.	
65.	my $usage = "usage: svr_neighbors_of [-c column] N";
66.	
67.	my $column;
68.	my $N;
69.	while ($ARGV[0] && ($ARGV[0] =~ /^-/))
70.	{
71.	    $_ = shift @ARGV;
72.	    if    ($_ =~ s/^-c//) { $column       = ($_ || shift @ARGV) }
73.	    else                  { die "Bad Flag: $_" }
74.	}
75.	($N = shift @ARGV) || die $usage;
76.	
77.	my @lines = map { chomp; [split(/\t/,$_)] } ;
78.	if (! $column)  { $column = @{$lines[0]} }
79.	my @pegs = map { $_->[$column-1] } @lines;
80.	
81.	my $neighbors = &get_neighbors(\@pegs,$N);
82.	foreach $_ (@lines)
83.	{
84.	    if ($column && $_->[$column-1] && $neighbors->{$_->[$column-1]})
85.	    {
86.		my $neigh = join(",",@{$neighbors->{$_->[$column-1]}});
87.		print join("\t",(@$_,$neigh)),"\n";
88.	    }
89.	}
90.	
91.	sub get_neighbors {
92.	    my($pegs,$N) = @_;
93.	
94.	    my $neighbors = {};
95.	    my $pegsH = {map { $_ => 1 } @$pegs};
96.	    my %genomes	 = map { $_ =~ /^fig\|(\d+\.\d+)/; $1 => 1 } @$pegs;
97.	    my @genomesL = keys(%genomes);
98.	
99.	    my %by_genome;
100.	    foreach my $genome (@genomesL)
101.	    {
102.		my $pegHash  = $sapObject->all_features(-ids => $genome, -type => 'peg');
103.		my $all_pegs = $pegHash->{$genome};
104.		my $locHash  = $sapObject->fid_locations(-ids => $all_pegs);
105.		my @peg_loc_tuples_in_genome =
106.		    sort { &compare_locs($a->[1],$b->[1]) }
107.		    map { [$_,$locHash->{$_}] }
108.		    keys(%$locHash);
109.		&set_neighbors(\@peg_loc_tuples_in_genome,$pegsH,$neighbors,$N);
110.	    }
111.	    return $neighbors;
112.	}
113.	
114.	sub compare_locs {
115.	    my($loc1,$loc2) = @_;
116.	
117.	    my($contig1,$min1,$max1) = &SeedUtils::boundaries_of($loc1);
118.	    my($contig2,$min2,$max2) = &SeedUtils::boundaries_of($loc2);
119.	    return (($contig1 cmp $contig2) or (($min1+$max1) <=> ($min2+$max2)));
120.	}
121.	
122.	sub set_neighbors {
123.	    my($peg_loc_tuples,$pegsH,$neighborsH,$N) = @_;
124.	
125.	    my $i;
126.	    for ($i=0; ($i < @$peg_loc_tuples); $i++)
127.	    {
128.		next if (! $pegsH->{$peg_loc_tuples->[$i]->[0]});
129.	
130.		my($contigI,$minI,$maxI) = &SeedUtils::boundaries_of($peg_loc_tuples->[$i]->[1]);
131.		$contigI || confess "BAD";
132.		my $neighbors = [];
133.		my $j = $i-1;
134.		while (($j >= 0) && ($j >= ($i-$N)) && 
135.		       &same_contig($peg_loc_tuples->[$j]->[1],$contigI))
136.		{
137.		    $j--;
138.		}
139.		$j++;
140.		while ($j < $i) { push(@$neighbors,$peg_loc_tuples->[$j]->[0]); $j++ }
141.	
142.		$j = $i+1;
143.		while (($j < @$peg_loc_tuples) && ($j <= ($i+$N)) && 
144.		       &same_contig($peg_loc_tuples->[$j]->[1],$contigI))
145.		{
146.		    push(@$neighbors,$peg_loc_tuples->[$j]->[0]);
147.		    $j++;
148.		}
149.		$neighborsH->{$peg_loc_tuples->[$i]->[0]} = $neighbors;
150.	    }
151.	}
152.	
153.	sub same_contig {
154.	    my($entry,$contig) = @_;
155.	
156.	    $contig || confess "BAD";
157.	    my($contig1,$minI,$maxI) = &SeedUtils::boundaries_of($entry);
158.	    return ($contig eq $contig1);
159.	}