-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtwitter_friend_graph.py
59 lines (51 loc) · 2.21 KB
/
twitter_friend_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from igraph import *
from database import *
from twitter_graph import *
from optparse import OptionParser
def main():
parser = OptionParser()
parser.add_option("-g", action="store_true", dest="graphs",
help="Output graphs for communities.")
parser.add_option("-s", action="store_true", dest="subcomms",
help="Do sub-communities and output graphs. This sets users 'group' column to a subcomm number.")
parser.add_option("-m", "--max_users", type="int", default=30000,
help="Maximum number of users to attempt to cluster/graph.")
parser.add_option("-c", action="store_true", dest="color_by_group",
help="Color users by group.")
options, args = parser.parse_args()
twit_graph = TwitterGraph()
print "Adding users to igraph.Graph"
users = database.findAllExploredUsers(limit=options.max_users)
if options.color_by_group:
[twit_graph.addUserWithGroupColor(u) for u in users]
else:
[twit_graph.addUser(u) for u in users]
del users
print "Adding edges to igraph.Graph"
NUM_EDGES_PER_BATCH = 100000
results = NUM_EDGES_PER_BATCH
page = 0
while results == NUM_EDGES_PER_BATCH:
edges = database.findAllEdgesBatched(NUM_EDGES_PER_BATCH, page) # Need to paginate getting edges
results = len(edges)
[twit_graph.addEdge(e) for e in edges]
print "Added %s edges" % (results)
del edges
page += 1
print "Database objects retreived"
# We (may) need to filter out super-users that have too many friends.
# They don't do us any good at finding relationships between real people.
# They may provide insight into topics, but I am pretty sure they screw up clustering.
print "\n".join(["","=== Generating Graph ===",""])
twit_graph.generateGraph()
print "\n".join(["","=== Finding Communities ===",""])
twit_graph.findCommunities()
if (options.subcomms):
twit_graph.findSubcommunities(draw=True)
if (options.graphs):
print "\n".join(["","=== Drawing Communities ===",""])
twit_graph.drawCommunities("community", height=1500, width=1000)
print "\n".join(["","=== Drawing World Graph ===",""])
twit_graph.drawBigGraph("world", height=3000, width=2000)
if __name__ == '__main__':
main()