forked from ChrisMusson/FBRef_DB
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
67 lines (55 loc) · 2.21 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
import os
import sqlite3
import time
from create_database import create_database
from matches import update_matches
from players import insert_players
from update_local import update_local
from utils import get_matches_in_database
def main(database_file, competitions=["Premier_League"], seasons=["2021-2022"]):
connection = sqlite3.connect(database_file)
cursor = connection.cursor()
create_database(cursor)
for competition in competitions:
for season in seasons:
with open("db_helper.json", "r") as f:
comp = json.load(f)["competitions"][competition]
season_start = int(season.split("-")[0])
if comp["start_year"] > season_start or comp["end_year"] < season_start:
print(
f"xG data doesn't exist for the {season} {competition} season"
)
continue
update_local(competition, season)
update_matches(cursor, competition, season)
db_matches = get_matches_in_database(cursor, competition, season)
local_matches = set(
os.listdir(os.path.join("web_pages", competition, season))
)
matches_to_add = local_matches - db_matches
insert_players(cursor, competition, season, list(matches_to_add))
connection.commit()
if len(matches_to_add) <= 20:
print(
"Didn't need to add many files to the database. Sleeping to avoid rate limit"
)
"""
I have done lots of testing to see what sleep length is required to not get put in FBRef jail.
It's annoying and slow, but 7 seconds is required
"""
time.sleep(7)
connection.close()
if __name__ == "__main__":
competitions = ["Premier_League", "Bundesliga", "La_Liga", "Ligue_1", "Serie_A", "Primeira_Liga"]
seasons = [
# "2017-2018",
# "2018-2019",
# "2019-2020",
# "2020-2021",
# "2021-2022",
# "2022-2023",
# "2023-2024",
"2024-2025",
]
main("master.db", competitions=competitions, seasons=seasons)