-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbagel4_blast.pm
95 lines (79 loc) · 2.76 KB
/
bagel4_blast.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package bagel4_blast ;
##############################################################
##
## Anne de Jong
## University of Groningen
## the Netherlands
## anne.de.jong@rug.nl
##
##############################################################
##
##
## 2017 July
## Package for parsing blast results for BAGEL4
##
use strict ;
use warnings ;
use lib "/usr/molgentools/lib";
use anne_files;
use LWP::Simple;
BEGIN {
use Exporter ();
}
# ------------------------------------------------------------------ functions ----------------------------------------------------------------
sub get_uniprot_features {
my $uniprotXMLfile = shift ; # full path to the uniprot xml filename
my @lines = anne_files::read_lines($uniprotXMLfile) ;
my $xml = join '',@lines ;
$xml =~ s/\n//g ; # remove all returns
my $index = 0 ;
my @results ;
my %features ;
my $key = 0 ;
while ((substr $xml, $index) =~ m/<feature (.*?)<\/feature>/) {
my $feature = $1 ;
$index += $-[0] + 1 ;
$key++ ;
my $type = ''; $type = $1 if ($feature =~ m/type=\"(.*?)\"/) ;
my $description = ''; $description = $1 if ($feature =~ m/description=\"(.*?)\"/) ;
my $beginpos = ''; $beginpos = $1 if ($feature =~ m/begin position=\"(.*?)\"/) ;
my $endpos = ''; $endpos = $1 if ($feature =~ m/end position=\"(.*?)\"/) ;
my $position = ''; $position = $1 if ($feature =~ m/position position=\"(.*?)\"/) ;
push @results, "$type $description $position $beginpos $endpos";
$features{$key}{type} = $type ;
$features{$key}{description} = $description ;
$features{$key}{position} = $position ;
$features{$key}{beginpos} = $beginpos ;
$features{$key}{endpos} = $endpos ;
}
return %features ;
}
sub get_uniprot {
my $uniprot = shift ;
my $url = "http://www.uniprot.org/uniprot/$uniprot.xml" ;
my $xml = get($url);
$xml =~ s/\n//g ; # remove all returns
my $index = 0 ;
my @results ;
my %features ;
my $key = 0 ;
while ((substr $xml, $index) =~ m/<feature (.*?)<\/feature>/) {
my $feature = $1 ;
$index += $-[0] + 1 ;
$key++ ;
my $type = ''; $type = $1 if ($feature =~ m/type=\"(.*?)\"/) ;
my $description = ''; $description = $1 if ($feature =~ m/description=\"(.*?)\"/) ;
my $beginpos = ''; $beginpos = $1.' -' if ($feature =~ m/begin position=\"(.*?)\"/) ;
my $endpos = ''; $endpos = $1 if ($feature =~ m/end position=\"(.*?)\"/) ;
my $position = ''; $position = $1 if ($feature =~ m/position position=\"(.*?)\"/) ;
push @results, "$type $description $position $beginpos $endpos";
$features{$key}{type} = $type ;
$features{$key}{description} = $description ;
$features{$key}{position} = $position ;
$features{$key}{beginpos} = $beginpos ;
$features{$key}{endpos} = $endpos ;
}
return %features ;
}
## the mandatory one (without it no package!!!)
1