-
Notifications
You must be signed in to change notification settings - Fork 6
/
taxa.py
68 lines (51 loc) · 2.07 KB
/
taxa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import shutil
import requests
from utilities import decorate
from ete3 import NCBITaxa
DEFAULT_TAXADB = "./.etetoolkit/taxa.sqlite"
TAXA_DUMP = "./taxdump.tar.gz"
__is_setup__ = False
def setup():
global __is_setup__
if not __is_setup__:
if not os.path.exists(DEFAULT_TAXADB):
req = requests.get('http://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz', stream=True)
with open(TAXA_DUMP, 'wb') as f:
shutil.copyfileobj(req.raw, f)
NCBITaxa(dbfile=DEFAULT_TAXADB, taxdump_file=TAXA_DUMP)
__is_setup__ = True
ncbi_taxa = decorate(setup)
@ncbi_taxa()
def get_taxa_info() -> NCBITaxa:
return NCBITaxa(dbfile=DEFAULT_TAXADB, taxdump_file=None if os.path.exists(DEFAULT_TAXADB) else TAXA_DUMP)
# Warning: in order for this to work you must go into the venv folder and then enter the
# lib/python3.6/site-packages/ete3/ncbi_taxonomy/SQLite-Levenshtein sub directory and run "make"
@ncbi_taxa()
def scientific_name_to_txid(name):
taxa = get_taxa_info()
return taxa.get_fuzzy_name_translation(name)[0]
@ncbi_taxa()
def get_name_for_rank(txid, rank, default="no_name_found"):
try:
taxa = get_taxa_info()
lineage2ranks = taxa.get_rank(taxa.get_lineage(txid))
ranks2lineage = dict((rank, taxid) for (taxid, rank) in lineage2ranks.items())
new_txid = ranks2lineage.get(rank, default)
translator = taxa.get_taxid_translator([new_txid])
return translator[new_txid]
except:
return default
@ncbi_taxa()
def get_lineage_info(txid):
return {
"order": get_name_for_rank(txid, "order"),
"suborder": get_name_for_rank(txid, "suborder"),
"infraorder": get_name_for_rank(txid, "infraorder"),
"parvorder": get_name_for_rank(txid, "parvorder"),
"superfamily": get_name_for_rank(txid, "superfamily"),
"family": get_name_for_rank(txid, "family"),
"subfamily": get_name_for_rank(txid, "subfamily"),
"genus": get_name_for_rank(txid, "genus"),
"species": get_name_for_rank(txid, "species")
}