-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathredditstats.py
67 lines (62 loc) · 2.08 KB
/
redditstats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from multiprocessing import *
import praw
from praw.handlers import MultiprocessHandler
from mongoTools import *
from pymongo import *
import datetime
import sys
import requests
def getSubredditUsers(subreddit):
"""
Gets all of the commenters in a certain subreddit provided by the parameter.
"""
client = MongoClient()
reddit = praw.Reddit(user_agent="Subreddit Recommender", handler=MultiprocessHandler())
subreddit = reddit.get_subreddit(subreddit)
comments = subreddit.get_comments(limit=250)
currentUsers = allUsers(client)
if currentUsers:
found = [user['username'] for user in currentUsers]
else:
found = []
users = []
for comment in comments:
if comment.author.name not in found:
users.append({'user': comment.author.name})
return tempBulkInsert(users, client)
def getComments(username):
"""
Return the subreddits a certain user, provided by the parameter, has commented in.
"""
try:
client = MongoClient()
reddit = praw.Reddit(user_agent="Subreddit Recommender", handler = MultiprocessHandler())
user = reddit.get_redditor(username)
subs= []
for comment in user.get_comments(limit=250):
if comment.subreddit.display_name not in subs:
subs.append(comment.subreddit.display_name)
insertSub(comment.subreddit.display_name, client)
return insertUser(username, subs, client)
except requests.exceptions.HTTPError as exception:
print(exception)
pass
def getSubreddits():
"""
Returns the 'all' subreddit to use as a test. Feel free to change this to any subreddit of your choice. 'All' is used here because
it is the most vague, and has users that are more likely to be similar to the general user.
"""
return ['all']
def main():
try:
pool = Pool(processes=(cpu_count()*6))
subs = getSubreddits()
pool.map(getSubredditUsers, subs)
users = [user['user'] for user in tempUserList(MongoClient())]
pool.map(getComments, users)
pool.close()
except KeyboardInterrupt:
pool.terminate()
sys.exit()
if __name__ == "__main__":
main()