-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
generate.py
87 lines (71 loc) · 2.8 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# A script for generating family lists and specimen aliases from a list of specimens (common or scientific name)
SEARCH_URL = "https://api.inaturalist.org/v1/taxa?q={}&is_active=true&rank=species&all_names=true"
TAXON_URL = "https://api.inaturalist.org/v1/taxa/{}"
import requests
import time
from typing import Dict, List
headers = {
"User-Agent": "SciOlyID",
}
with open("list.txt", "r") as f:
data = tuple(sorted(set(f.readlines())))
wikipedia: Dict[str, str] = {}
scinames: Dict[str, str] = {}
aliases: Dict[str, List[str]] = {}
families: Dict[str, List[str]] = {}
family_alias: Dict[str, str] = {}
family_ids: Dict[int, str] = {}
try:
for specimen in data:
print("specimen:", specimen.strip())
url = SEARCH_URL.format(specimen.strip())
r = requests.get(
url,
headers=headers,
)
returned = r.json()["results"][0]
name = returned["preferred_common_name"].lower()
wikipedia[name] = returned["wikipedia_url"]
aliases[name] = [
n["name"].lower()
for n in returned["names"]
if (n["locale"] == "sci" or n["locale"] == "en") and n["name"] != name
]
scinames[name] = returned["name"]
specimen_family_ids = returned["ancestor_ids"]
intersection = set(specimen_family_ids).intersection(set(family_ids.keys()))
if len(intersection) == 0:
print("fetching taxon")
time.sleep(1)
taxon_r = requests.get(
TAXON_URL.format(",".join(map(str, specimen_family_ids)))
)
taxon_results = taxon_r.json()["results"]
for taxon in taxon_results:
if taxon["rank"] != "family":
continue
family_name = taxon["name"].lower()
print("found:", family_name)
family_alias[family_name] = taxon["preferred_common_name"].lower()
family_ids[taxon["id"]] = family_name
families[family_name] = [name]
else:
families[family_ids[intersection.pop()]].append(name)
time.sleep(1)
except Exception as e:
print(e)
pass
finally:
with open("data/scinames.txt", "w") as f:
for specimen, sciname in scinames.items():
f.write(f"{specimen},{sciname}\n")
with open("data/wikipedia.txt", "w") as f:
for specimen, url in wikipedia.items():
f.write(f"{specimen},{url}\n")
with open("data/family_aliases.txt", "w") as f:
for family, alias in family_alias.items():
f.write(f'"{family}":["{alias}"],\n')
for family, specimens in families.items():
with open(f"data/taxon/{family.lower()}.txt", "w") as f:
for specimen in specimens:
f.write(f"{specimen},{','.join(aliases[specimen])}\n")