-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathexport.py
36 lines (31 loc) · 1.07 KB
/
export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json
from datetime import datetime
import gzip
from tqdm import tqdm
# get name of file
export_name = __file__.replace('.py', '.jsonl')
country_geonameid = "g-2802361"
with open(export_name, 'w') as json_file:
with gzip.open("gfm.jsonl.gz", 'rb') as f:
for line in tqdm(f):
line = line.strip()
tweet = json.loads(line)
if 'locations' not in tweet:
continue
locations = tweet['locations']
for location in locations:
if location['level_0_region'] == country_geonameid:
tweet = json.dumps(tweet, default=str)
json_file.write(tweet + '\n')
break
# query = es.build_date_query(
# start=datetime(2014, 1, 1),
# end=datetime.utcnow(),
# filter_within_countries=country_geonameid
# )
# print(query)
# with open(export_name, 'w') as f:
# for tweet in es.scroll_through(index='gfm', body=query):
# tweet = tweet['_source']
# tweets = json.dumps(tweet, default=str)
# f.write(tweets + '\n')