1.	use strict;
2.	use Data::Dumper;
3.	use Carp;
4.	
5.	#
6.	# This is a SAS Component
7.	#
8.	
9.	=head1 svr_neighbors_of
10.	
11.	    svr_neighbors_of neighbor_ids.tbl
12.	
13.	Get neighbors of protein-encoding genes (PEGs)
14.	
15.	The standard input should be a tab-separated table (i.e., each line 
16.	is a tab-separated set of fields).  Normally, the last field in each
17.	line would contain the PEG for which a list of neighbors is being requested.
18.	If some other column contains the feature IDs, use
19.	
20.	    -c n
21.	
22.	where n is the column (from 1) that contains the PEG in each case.  You are
23.	allowed to specify the number of neighbors you want using 
24.	
25.	     -N n 
26.	
27.	where n in this case is the number to each side of a given PEG.  That
28.	is, a value of 5 would lead to a result composed of the 5 genes to the
29.	left and the 5 genes to the right. 
30.	
31.	This is a pipe command. The input is taken from the standard input, and the
32.	output is to the standard output.
33.	
34.	=head2 Command-Line Options
35.	
36.	=over 4
37.	
38.	=item -c Column
39.	
40.	This is used only if the column containing PEGs is not the last.
41.	
42.	=item -N number-neighbors
43.	
44.	This value is the number to the left and the number to the right of the given gene.
45.	Hence, you would normally get twice this number as the detected set of neighbors.  You
46.	may get less if the given gene occurs near the beginning or end of a contig.
47.	
48.	=head2 Output Format
49.	
50.	The standard output is a tab-delimited file. It consists of the input
51.	file with an extra column added (i.e., the column containing the comma-separated
52.	list of neighbors).
53.	
54.	=back
55.	
56.	=cut
57.	
58.	use SeedUtils;
59.	use SAPserver;
60.	my $sapObject = SAPserver->new();
61.	
62.	my $usage = "usage: svr_neighbors_of [-c column] N";
63.	
64.	my $column;
65.	my $N;
66.	while ($ARGV[0] && ($ARGV[0] =~ /^-/))
67.	{
68.	    $_ = shift @ARGV;
69.	    if    ($_ =~ s/^-c//) { $column       = ($_ || shift @ARGV) }
70.	    else                  { die "Bad Flag: $_" }
71.	}
72.	($N = shift @ARGV) || die $usage;
73.	
74.	my @lines = map { chomp; [split(/\t/,$_)] } ;
75.	if (! $column)  { $column = @{$lines[0]} }
76.	my @pegs = map { $_->[$column-1] } @lines;
77.	
78.	my $neighbors = &get_neighbors(\@pegs,$N);
79.	foreach $_ (@lines)
80.	{
81.	    my $neigh = join(",",@{$neighbors->{$_->[$column-1]}});
82.	    print join("\t",(@$_,$neigh)),"\n";
83.	}
84.	
85.	sub get_neighbors {
86.	    my($pegs,$N) = @_;
87.	
88.	    my $neighbors = {};
89.	    my $pegsH = {map { $_ => 1 } @$pegs};
90.	    my %genomes	 = map { $_ =~ /^fig\|(\d+\.\d+)/; $1 => 1 } @$pegs;
91.	    my @genomesL = keys(%genomes);
92.	
93.	    my %by_genome;
94.	    foreach my $genome (@genomesL)
95.	    {
96.		my $pegHash  = $sapObject->all_features(-ids => $genome, -type => 'peg');
97.		my $all_pegs = $pegHash->{$genome};
98.		my $locHash  = $sapObject->fid_locations(-ids => $all_pegs);
99.		my @peg_loc_tuples_in_genome =
100.		    sort { &compare_locs($a->[1],$b->[1]) }
101.		    map { [$_,$locHash->{$_}] }
102.		    keys(%$locHash);
103.		&set_neighbors(\@peg_loc_tuples_in_genome,$pegsH,$neighbors,$N);
104.	    }
105.	    return $neighbors;
106.	}
107.	
108.	sub compare_locs {
109.	    my($loc1,$loc2) = @_;
110.	
111.	    my($contig1,$min1,$max1) = &SeedUtils::boundaries_of($loc1);
112.	    my($contig2,$min2,$max2) = &SeedUtils::boundaries_of($loc2);
113.	    return (($contig1 cmp $contig2) or (($min1+$max1) <=> ($min2+$max2)));
114.	}
115.	
116.	sub set_neighbors {
117.	    my($peg_loc_tuples,$pegsH,$neighborsH,$N) = @_;
118.	
119.	    my $i;
120.	    for ($i=0; ($i < @$peg_loc_tuples); $i++)
121.	    {
122.		next if (! $pegsH->{$peg_loc_tuples->[$i]->[0]});
123.	
124.		my($contigI,$minI,$maxI) = &SeedUtils::boundaries_of($peg_loc_tuples->[$i]->[1]);
125.		$contigI || confess "BAD";
126.		my $neighbors = [];
127.		my $j = $i-1;
128.		while (($j >= 0) && ($j >= ($i-$N)) && 
129.		       &same_contig($peg_loc_tuples->[$j]->[1],$contigI))
130.		{
131.		    $j--;
132.		}
133.		$j++;
134.		while ($j < $i) { push(@$neighbors,$peg_loc_tuples->[$j]->[0]); $j++ }
135.	
136.		$j = $i+1;
137.		while (($j < @$peg_loc_tuples) && ($j <= ($i+$N)) && 
138.		       &same_contig($peg_loc_tuples->[$j]->[1],$contigI))
139.		{
140.		    push(@$neighbors,$peg_loc_tuples->[$j]->[0]);
141.		    $j++;
142.		}
143.		$neighborsH->{$peg_loc_tuples->[$i]->[0]} = $neighbors;
144.	    }
145.	}
146.	
147.	sub same_contig {
148.	    my($entry,$contig) = @_;
149.	
150.	    $contig || confess "BAD";
151.	    my($contig1,$minI,$maxI) = &SeedUtils::boundaries_of($entry);
152.	    return ($contig eq $contig1);
153.	}