-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapper.py
81 lines (70 loc) · 1.81 KB
/
scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
import requests
from bs4 import BeautifulSoup
import csv, json
def get_data():
url = "https://sofifa.com/players?type=all&tm%5B0%5D=243&offset="
soup = BeautifulSoup(requests.get(url).content, "html.parser")
rv = []
for row in soup.select("tbody tr"):
id_ = row.select_one("img[id]")["id"]
name = row.select_one(".col-name").get_text(strip=True)
age = row.select_one(".col-ae").get_text(strip=True)
positions = [p.get_text(strip=True) for p in row.select("span.pos")]
nationality = row.select_one("img.flag")["title"]
overall = row.select_one(".col-oa").get_text(strip=True)
potential = row.select_one(".col-pt").get_text(strip=True)
club = row.select_one(".col-name > div > a").get_text(strip=True)
# sometimes there isn't any club, just country:
if club == "":
club = row.select_one(".col-name > div > a")["title"]
value = row.select_one(".col-vl").get_text(strip=True)
wage = row.select_one(".col-wg").get_text(strip=True)
rv.append(
[
id_,
name,
age,
", ".join(positions),
nationality,
overall,
potential,
club,
value,
wage,
]
)
return rv
all_data = []
for offset in range(0, 1): # <--- increase offset here
print("Offset {}...".format(offset))
all_data.extend(get_data())
df = pd.DataFrame(
all_data,
columns=[
"ID",
"Name",
"Age",
"Positions",
"Nationality",
"Overall",
"Potential",
"Club",
"Value",
"Wage",
],
)
print(df)
df.to_csv("data.csv", index=False)
# Now convert to json
csvFilePath = 'data.csv'
jsonFilePath = 'data.json'
data = {}
with open(csvFilePath) as csvFile:
csvReader = csv.DictReader(csvFile)
for rows in csvReader:
id = rows['ID']
data[id] = rows
# create new json file and write data to it
with open(jsonFilePath, 'w') as jsonFile:
jsonFile.write(json.dumps(data, indent=2))