forked from owid/covid-19-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbulgaria.py
69 lines (47 loc) · 2.04 KB
/
bulgaria.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import re
import pandas as pd
from bs4 import BeautifulSoup
from cowidev.utils.clean.dates import localdate
from cowidev.utils.web import get_soup
from cowidev.vax.utils.incremental import enrich_data, increment
def read(source: str) -> pd.Series:
soup = get_soup(source)
return parse_data(soup)
def parse_data(soup: BeautifulSoup) -> pd.Series:
table = soup.find("p", string=re.compile("Поставени ваксини по")).parent.find("table")
data = pd.read_html(str(table))[0]
data = data.droplevel(level=0, axis=1)
data = data[data["Област"] == "Общо"]
return data.set_index(data.columns[0]).T.squeeze()
def enrich_date(ds: pd.Series) -> pd.Series:
date = localdate("Europe/Sofia")
return enrich_data(ds, "date", date)
def translate_index(ds: pd.Series) -> pd.Series:
return ds.rename(
{
"Общ брой лица със завършен ваксинационен цикъл": "people_fully_vaccinated",
"Общо поставени дози": "total_vaccinations",
}
)
def enrich_location(ds: pd.Series) -> pd.Series:
return enrich_data(ds, "location", "Bulgaria")
def enrich_vaccine(ds: pd.Series) -> pd.Series:
return enrich_data(ds, "vaccine", "Johnson&Johnson, Oxford/AstraZeneca, Moderna, Pfizer/BioNTech")
def enrich_source(ds: pd.Series) -> pd.Series:
return enrich_data(ds, "source_url", "https://coronavirus.bg/bg/statistika")
def pipeline(ds: pd.Series) -> pd.Series:
return ds.pipe(translate_index).pipe(enrich_date).pipe(enrich_location).pipe(enrich_vaccine).pipe(enrich_source)
def main(paths):
source = "https://coronavirus.bg/bg/statistika"
data = read(source).pipe(pipeline)
increment(
paths=paths,
location=data["location"],
total_vaccinations=int(data["total_vaccinations"]),
people_fully_vaccinated=int(data["people_fully_vaccinated"]),
date=data["date"],
source_url=data["source_url"],
vaccine=data["vaccine"],
)
if __name__ == "__main__":
main()