-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
37 lines (30 loc) · 934 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from scraper import GithubProfileScraper
from utils import write_json
from os.path import isfile
from random import randint
# path to file containing username with each
username_file = 'usernames.txt'
proxy_file = 'proxy.json'
# read usernames file containting GitHub usernames on each line
assert isfile(username_file)
usernames = []
with open(username_file, 'r') as f:
usernames = f.read().split('\n')
# scrape data for 5-10 random users
# base_num = randint(1, 2860)
# offset = randint(5, 10)
# usernames = usernames[base_num:base_num+offset]
# create obj
scraper = GithubProfileScraper(
max_threads=5,
proxy_json_file=proxy_file
)
# scrape data
data = dict()
for username in usernames:
user_details = scraper.scrape_user_data(username)
data[username] = user_details
print('-'*40)
# write data to a file
write_json('scraped_data.json', data)
print(f'data scraped for {len(usernames)} users')