24
24
from .ensembl_versions import check_release_number
25
25
26
26
ENSEMBL_FTP_SERVER = "https://ftp.ensembl.org"
27
+ ENSEMBL_PLANTS_FTP_SERVER = "https://ftp.ensemblgenomes.ebi.ac.uk/"
27
28
28
29
# Example directories
29
30
# FASTA files: /pub/release-78/fasta/homo_sapiens/
30
31
# GTF annotation files: /pub/release-78/gtf/homo_sapiens/
31
32
FASTA_SUBDIR_TEMPLATE = "/pub/release-%(release)d/fasta/%(species)s/%(type)s/"
33
+ PLANTS_FASTA_SUBDIR_TEMPLATE = "/pub/release-%(release)d/plants/fasta/%(species)s/%(type)s/"
32
34
GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/gtf/%(species)s/"
35
+ PLANTS_GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/plants/gtf/%(species)s/"
33
36
37
+ #List plants
38
+ #Lest do a vector with all the plants species that we added to make the custom url
39
+ lPlants = ("arabidopsis_thaliana" ,"arabidopsis" )
34
40
35
41
def normalize_release_properties (ensembl_release , species ):
36
42
"""
@@ -63,12 +69,18 @@ def make_gtf_filename(ensembl_release, species):
63
69
}
64
70
65
71
66
- def make_gtf_url (ensembl_release , species , server = ENSEMBL_FTP_SERVER ):
72
+ def make_gtf_url (ensembl_release , species , server = ENSEMBL_FTP_SERVER , gtf_subdir = GTF_SUBDIR_TEMPLATE ):
67
73
"""
68
74
Returns a URL and a filename, which can be joined together.
69
75
"""
76
+ if species .is_plant :
77
+ server = ENSEMBL_PLANTS_FTP_SERVER
78
+ gtf_subdir = PLANTS_GTF_SUBDIR_TEMPLATE
79
+ #else:
80
+ #print(f"[+] {species.latin_name} it is not a plant", flush=True)
81
+
70
82
ensembl_release , species , _ = normalize_release_properties (ensembl_release , species )
71
- subdir = GTF_SUBDIR_TEMPLATE % {"release" : ensembl_release , "species" : species }
83
+ subdir = gtf_subdir % {"release" : ensembl_release , "species" : species }
72
84
filename = make_gtf_filename (ensembl_release = ensembl_release , species = species )
73
85
return server + subdir + filename
74
86
@@ -93,11 +105,11 @@ def make_gtf_url(ensembl_release, species, server=ENSEMBL_FTP_SERVER):
93
105
NEW_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.ncrna.fa.gz"
94
106
95
107
96
- def make_fasta_filename (ensembl_release , species , sequence_type ):
108
+ def make_fasta_filename (ensembl_release , species , sequence_type , is_plant ):
97
109
ensembl_release , species , reference_name = normalize_release_properties (
98
110
ensembl_release , species
99
111
)
100
- if ensembl_release <= 75 :
112
+ if ensembl_release <= 75 and not is_plant :
101
113
if sequence_type == "ncrna" :
102
114
return OLD_FASTA_FILENAME_TEMPLATE_NCRNA % {
103
115
"Species" : species .capitalize (),
@@ -125,7 +137,7 @@ def make_fasta_filename(ensembl_release, species, sequence_type):
125
137
}
126
138
127
139
128
- def make_fasta_url (ensembl_release , species , sequence_type , server = ENSEMBL_FTP_SERVER ):
140
+ def make_fasta_url (ensembl_release , species , sequence_type , is_plant , server = ENSEMBL_FTP_SERVER , fasta_subdir = FASTA_SUBDIR_TEMPLATE ):
129
141
"""Construct URL to FASTA file with cDNA transcript or protein sequences
130
142
131
143
Parameter examples:
@@ -136,12 +148,17 @@ def make_fasta_url(ensembl_release, species, sequence_type, server=ENSEMBL_FTP_S
136
148
ensembl_release , species , reference_name = normalize_release_properties (
137
149
ensembl_release , species
138
150
)
139
- subdir = FASTA_SUBDIR_TEMPLATE % {
151
+
152
+ if is_plant :
153
+ server = ENSEMBL_PLANTS_FTP_SERVER
154
+ fasta_subdir = PLANTS_FASTA_SUBDIR_TEMPLATE
155
+
156
+ subdir = fasta_subdir % {
140
157
"release" : ensembl_release ,
141
158
"species" : species ,
142
159
"type" : sequence_type ,
143
160
}
144
161
filename = make_fasta_filename (
145
- ensembl_release = ensembl_release , species = species , sequence_type = sequence_type
162
+ ensembl_release = ensembl_release , species = species , sequence_type = sequence_type , is_plant = is_plant
146
163
)
147
164
return server + subdir + filename
0 commit comments