1. use strict;
2. use Data::Dumper;
3. use Carp;
4.
5. #
6. # This is a SAS Component
7. #
8.
9.
10. =head1 svr_all_features Genome Type
11.
12. Get a list of Feature IDs for all features of a given type in a given genome
13. or a list of genomes.
14.
15. If a genome ID is specified in the command line, there is no input. Otherwise,
16. this script takes as input a tab-delimited file with genome IDs at the end of
17. each line.
18.
19. The output is a file of feature IDs, one ID per line.
20.
21. ------
22. Example: svr_all_features 3702.1 peg | svr_function_of
23.
24. would produce a 2-column table. The first column would contain
25. PEG IDs for genes occurring in genome 3702.1, and the second
26. would contain the functions of those genes.
27. ------
28.
29. =head2 Command-Line Options
30.
31. =over 4
32.
33. =item Genome
34.
35. A genome that is in the SEED. The ID must be a valid SEED genome ID of the
36. form /^\d+\.\d+$/ (i.e., of the form xxxx.yyy)
37.
38. =item Type
39.
40. The type of the features sought (e.g., peg or rna)
41.
42. =back
43.
44. =head2 Output Format
45.
46. The standard output is a file where each line just contains a feature ID.
47.
48. =cut
49.
50.
51. use SeedEnv;
52. my $sapObject = SAPserver->new();
53.
54. my $usage = "usage: svr_all_features [Genome] Type";
55.
56. my($genome, $type);
57.
58. if (@ARGV == 1)
59. {
60. $type = shift;
61. }
62. elsif (@ARGV == 2)
63. {
64. $genome = shift;
65. $type = shift;
66.
67. process_genomes($type, [$genome]);
68. exit;
69. }
70. else
71. {
72. die $usage;
73. }
74.
75. #
76. # If we get here, we are reading genomes from STDIN. Process in batches
77. # to make this code friendlier on the servers for large input files.
78. #
79.
80. my $batch_size = 10;
81.
82. my @genomes;
83.
84. while ()
85. {
86. chomp;
87. my @cols = split(/\t/);
88. my $genome = $cols[-1];
89. push(@genomes, $genome);
90. if (@genomes >= $batch_size)
91. {
92. process_genomes($type, \@genomes);
93. @genomes = ();
94. }
95. }
96. if (@genomes)
97. {
98. process_genomes($type, \@genomes);
99. }
100.
101. sub process_genomes
102. {
103. my($type, $genomes) = @_;
104.
105. # print STDERR "Request @$genomes\n";
106. my $fidHash = $sapObject->all_features(-ids => $genomes, -type => $type);
107.
108. foreach my $gid (@$genomes)
109. {
110. foreach my $fid (sort { &SeedUtils::by_fig_id($a, $b) } @{$fidHash->{$gid}} )
111. {
112. print "$fid\n";
113. }
114. }
115. }
116.