-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathMakeRdpTaxonomy.pl
79 lines (60 loc) · 1.62 KB
/
MakeRdpTaxonomy.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/perl -w
=head1 NAME
MakeRdpTaxonomy.pl - Extracts taxonomic information from an RDP (http://rdp.cme.msu.edu/) file in genbank format and outputs a tab-separated file with sequence id in first column and taxonomy in second column (with the different taxonoimc levels separated by semicolon).
=head1 USAGE
perl MakeRdpTaxonomy.pl -i RDP_GENBANK_FILE -o OUTPUT_FILE [-h]
=head1 POSITIONAL ARGUMENTS
-i RDP_GENBANK_FILE Specify RDP file in genbank format
-o OUTPUT_FILE Specify output file name (overwrites existing file with same name)
=head1 OPIONAL ARGUMENTS
-h Print this help message
=cut
use Getopt::Long;
$infile = undef;
$outfile = undef;
&GetOptions('i=s' => \$infile, 'o=s' => \$outfile, 'h!' => \$help);
if (!$infile or !$outfile or $help) {
system ('perldoc', $0);
exit;
}
#####
print"\nRunning MakeRdpTaxonomy\n";
&extract_taxonomy_from_genbank;
print"Finnished MakeRdpTaxonomy succesfully\n\n";
#####
sub extract_taxonomy_from_genbank {
$in = 0;
open (INFILE, "$infile") || die ("Can't open $infile\n");
open (OUT, ">$outfile");
while (<INFILE>) {
chomp;
$row = $_;
@fields = split(/\s+/, $row);
if ($fields[0] eq "LOCUS") {
$rdp_id = $fields[1];
}
if ($fields[0] eq "REFERENCE") {
$in = 0;
}
if ($fields[0] eq "COMMENT") {
$in = 0;
}
if ($in == 1) {
$row =~ tr/ //d;
$tax = "$tax$row";
}
if (@fields > 1) {
if ($fields[1] eq "ORGANISM") {
$in = 1;
$tax = "";
}
}
if ($fields[0] eq "//") {
$tax =~ m/(Root.*[^\.]\.)/;
$tax = $1;
print OUT "$rdp_id\t$tax\n";
}
}
close (INFILE);
close(OUT);
}