forked from CNXTEoEorg/fortune-grabber
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfortune1000.py
98 lines (83 loc) · 3.7 KB
/
fortune1000.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# -*- coding: utf-8 -*-
# Fortune 1000 grabber. Download and parse companies information
# Import libraries
# Original code used python 2.7. I did it using python 3.6. Also, this code works for the current website.
# Also. I added a little more meta-data.
# last checked : 8th Spetember, 2017
from json import JSONDecodeError
import urllib.request
import json
import csv
class Company:
ranking = 0
fullname = ""
ticker = ""
industry = ""
sector = ""
hqlocation = ""
hqaddr = ""
website = ""
yearsonlist = 0
ceo = ""
eps = 0
employees = 0
mktval=0
global500rank = 0
revenues = 0
revchange = 0.0
profits = 0
prftchange = 0.0
hqstate = ""
hqzip = ""
def grab():
# Obtaining post id
companies = []
# Fetch for pages with data and process JSONs
for i in range(0, 1000, 100):
page_url = "http://fortune.com/api/v2/list/2013055/expand/item/ranking/asc/{postid}/{index}/".format(postid=str(i), index=str(i+100))
try:
page_data = json.load(urllib.request.urlopen(page_url), encoding='utf-8')
for item in page_data["list-items"]:
company = Company()
try:
company.ranking = item["meta"]["ranking"]
company.fullname = item["meta"]["fullname"]
company.ticker = item["meta"]["ticker"].upper()
company.industry = item["meta"]["industry"]
company.sector = item["meta"]["sector"]
company.hqlocation = item["meta"]["hqlocation"]
company.hqaddr = item["meta"]["hqaddr"]
company.yearsonlist = item["meta"]["yearsonlist"]
company.ceo = item["meta"]["ceo"]
company.eps = item["meta"]["eps"]
company.employees = item["meta"]["employees"]
company.revenues = item["meta"]["revenues"]
company.profits = item["meta"]["profits"]
company.hqzip = item["meta"]["hqzip"]
company.website = item["meta"]["website"]
company.mktval = item["meta"]["mktval"]
company.global500rank = item["meta"]["global500-rank"]
company.revchange = item["meta"]["revchange"]
company.prftchange = item["meta"]["prftchange"]
company.hqstate = item["meta"]["hqstate"]
except KeyError:
print("Keyerror has occurered for " + str(company.fullname))
print(str(company.ranking) + ". " + str(company.fullname) + " ; " + str(company.ceo))
companies.append(company)
except JSONDecodeError:
print(str(i) + " to " + str(i+100) + " has JSONDecodeError.")
return companies
# Obtain companies
companies = grab()
f = open("output/fortune1000.csv", "wt")
try:
writer = csv.writer(f)
writer.writerow(["ranking", "full name", "ticker", "industry", "sector", "hq location", "hq address", "years on list", "ceo", "eps",
"employees", "revenues", "profits", "hq zip", "website", "market value", "global 500 rank", "revenue change", "profit change", "hq state"])
for company in companies:
writer.writerow([company.ranking, company.fullname, company.ticker, company.industry,
company.sector, company.hqlocation, company.hqaddr, company.yearsonlist, company.ceo, company.eps,
company.employees, company.revenues, company.profits, company.hqzip, company.website, company.mktval,
company.global500rank, company.revchange, company.prftchange, company.hqstate])
finally:
f.close()