forked from CNXTEoEorg/fortune-grabber
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathglobal500.py
97 lines (81 loc) · 3.56 KB
/
global500.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
# Fortune Global 500 grabber. Download and parse companies information
# Import libraries
# Same as the fortune1000.py. However some fields of eps and mktvalue are missing. I skipped it. :)
# last checked : 8th September, 2017
from json import JSONDecodeError
import urllib.request
import json
import csv
class Company:
ranking = 0
fullname = ""
ticker = ""
industry = ""
sector = ""
hqlocation = ""
hqaddr = ""
website = ""
yearsonlist = 0
ceo = ""
employees = 0
fortune500rank = 0
revenues = 0
revchange = 0.0
profits = 0
prftchange = 0.0
hqstate = ""
hqzip = ""
assets = 0
def grab():
# Obtaining post id
companies = []
# Fetch for pages with data and process JSONs
for i in range(0, 500, 50):
page_url = "http://fortune.com/api/v2/list/2082743/expand/item/ranking/asc/{postid}/50/".format(postid=str(i))
try:
page_data = json.load(urllib.request.urlopen(page_url), encoding='utf-8')
for item in page_data["list-items"]:
company = Company()
try:
company.ranking = item["meta"]["ranking"]
company.fullname = item["meta"]["fullname"]
company.ticker = item["meta"]["ticker"].upper()
company.industry = item["meta"]["industry"]
company.sector = item["meta"]["sector"]
company.hqlocation = item["meta"]["hqlocation"]
company.hqaddr = item["meta"]["hqaddr"]
company.yearsonlist = item["meta"]["yearsonlist"]
company.ceo = item["meta"]["ceo"]
company.employees = item["meta"]["employees"]
company.revenues = item["meta"]["revenues"]
company.profits = item["meta"]["profits"]
company.hqzip = item["meta"]["hqzip"]
company.website = item["meta"]["website"]
company.fortune500rank = item["meta"]["fortune500-rank"]
company.revchange = item["meta"]["revchange"]
company.prftchange = item["meta"]["prftchange"]
company.hqstate = item["meta"]["hqstate"]
company.assets = item["meta"]["assets"]
except KeyError:
print("Keyerror has occurred for " + str(company.fullname))
print(str(company.ranking) + ". " + str(company.fullname) + " ; " + str(company.ceo))
companies.append(company)
except JSONDecodeError:
print(str(i) + " to " + str(i+100) + " has JSONDecodeError.")
return companies
# Obtain companies
companies = grab()
# Saving to CSV
f = open("global500.csv", "wt")
try:
writer = csv.writer(f)
writer.writerow(["ranking", "full name", "ticker", "industry", "sector", "hq location", "hq address", "years on list", "ceo",
"employees", "revenues", "profits", "hq zip", "website", "fortune 500 rank", "revenue change", "profit change", "assets","hq state"])
for company in companies:
writer.writerow([company.ranking, company.fullname, company.ticker, company.industry,
company.sector, company.hqlocation, company.hqaddr, company.yearsonlist, company.ceo,
company.employees, company.revenues, company.profits, company.hqzip, company.website,
company.fortune500rank, company.revchange, company.prftchange, company.assets, company.hqstate])
finally:
f.close()