1. ######################################################################## 2. use strict; 3. use Data::Dumper; 4. use Carp; 5. $| = 1; 6. 7. 8. # 9. # This is a SAS Component 10. # 11. 12. =head1 svr_neighbors_of 13. 14. svr_neighbors_ofneighbor_ids.tbl 15. 16. Get neighbors of protein-encoding genes (PEGs) 17. 18. The standard input should be a tab-separated table (i.e., each line 19. is a tab-separated set of fields). Normally, the last field in each 20. line would contain the PEG for which a list of neighbors is being requested. 21. If some other column contains the feature IDs, use 22. 23. -c n 24. 25. where n is the column (from 1) that contains the PEG in each case. You are 26. allowed to specify the number of neighbors you want using 27. 28. -N n 29. 30. where n in this case is the number to each side of a given PEG. That 31. is, a value of 5 would lead to a result composed of the 5 genes to the 32. left and the 5 genes to the right. 33. 34. This is a pipe command. The input is taken from the standard input, and the 35. output is to the standard output. 36. 37. =head2 Command-Line Options 38. 39. =over 4 40. 41. =item -c Column 42. 43. This is used only if the column containing PEGs is not the last. 44. 45. =item -N number-neighbors 46. 47. This value is the number to the left and the number to the right of the given gene. 48. Hence, you would normally get twice this number as the detected set of neighbors. You 49. may get less if the given gene occurs near the beginning or end of a contig. 50. 51. =head2 Output Format 52. 53. The standard output is a tab-delimited file. It consists of the input 54. file with an extra column added (i.e., the column containing the comma-separated 55. list of neighbors). 56. 57. =back 58. 59. =cut 60. 61. use SeedUtils; 62. use SAPserver; 63. my $sapObject = SAPserver->new(); 64. 65. my $usage = "usage: svr_neighbors_of [-c column] N"; 66. 67. my $column; 68. my $N; 69. while ($ARGV[0] && ($ARGV[0] =~ /^-/)) 70. { 71. $_ = shift @ARGV; 72. if ($_ =~ s/^-c//) { $column = ($_ || shift @ARGV) } 73. else { die "Bad Flag: $_" } 74. } 75. ($N = shift @ARGV) || die $usage; 76. 77. my @lines = map { chomp; [split(/\t/,$_)] } ; 78. if (! $column) { $column = @{$lines[0]} } 79. my @pegs = map { $_->[$column-1] } @lines; 80. 81. my $neighbors = &get_neighbors(\@pegs,$N); 82. foreach $_ (@lines) 83. { 84. if ($column && $_->[$column-1] && $neighbors->{$_->[$column-1]}) 85. { 86. my $neigh = join(",",@{$neighbors->{$_->[$column-1]}}); 87. print join("\t",(@$_,$neigh)),"\n"; 88. } 89. } 90. 91. sub get_neighbors { 92. my($pegs,$N) = @_; 93. 94. my $neighbors = {}; 95. my $pegsH = {map { $_ => 1 } @$pegs}; 96. my %genomes = map { $_ =~ /^fig\|(\d+\.\d+)/; $1 => 1 } @$pegs; 97. my @genomesL = keys(%genomes); 98. 99. my %by_genome; 100. foreach my $genome (@genomesL) 101. { 102. my $pegHash = $sapObject->all_features(-ids => $genome, -type => 'peg'); 103. my $all_pegs = $pegHash->{$genome}; 104. my $locHash = $sapObject->fid_locations(-ids => $all_pegs); 105. my @peg_loc_tuples_in_genome = 106. sort { &compare_locs($a->[1],$b->[1]) } 107. map { [$_,$locHash->{$_}] } 108. keys(%$locHash); 109. &set_neighbors(\@peg_loc_tuples_in_genome,$pegsH,$neighbors,$N); 110. } 111. return $neighbors; 112. } 113. 114. sub compare_locs { 115. my($loc1,$loc2) = @_; 116. 117. my($contig1,$min1,$max1) = &SeedUtils::boundaries_of($loc1); 118. my($contig2,$min2,$max2) = &SeedUtils::boundaries_of($loc2); 119. return (($contig1 cmp $contig2) or (($min1+$max1) <=> ($min2+$max2))); 120. } 121. 122. sub set_neighbors { 123. my($peg_loc_tuples,$pegsH,$neighborsH,$N) = @_; 124. 125. my $i; 126. for ($i=0; ($i < @$peg_loc_tuples); $i++) 127. { 128. next if (! $pegsH->{$peg_loc_tuples->[$i]->[0]}); 129. 130. my($contigI,$minI,$maxI) = &SeedUtils::boundaries_of($peg_loc_tuples->[$i]->[1]); 131. $contigI || confess "BAD"; 132. my $neighbors = []; 133. my $j = $i-1; 134. while (($j >= 0) && ($j >= ($i-$N)) && 135. &same_contig($peg_loc_tuples->[$j]->[1],$contigI)) 136. { 137. $j--; 138. } 139. $j++; 140. while ($j < $i) { push(@$neighbors,$peg_loc_tuples->[$j]->[0]); $j++ } 141. 142. $j = $i+1; 143. while (($j < @$peg_loc_tuples) && ($j <= ($i+$N)) && 144. &same_contig($peg_loc_tuples->[$j]->[1],$contigI)) 145. { 146. push(@$neighbors,$peg_loc_tuples->[$j]->[0]); 147. $j++; 148. } 149. $neighborsH->{$peg_loc_tuples->[$i]->[0]} = $neighbors; 150. } 151. } 152. 153. sub same_contig { 154. my($entry,$contig) = @_; 155. 156. $contig || confess "BAD"; 157. my($contig1,$minI,$maxI) = &SeedUtils::boundaries_of($entry); 158. return ($contig eq $contig1); 159. }