-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmeta_graph.py
156 lines (130 loc) · 5.05 KB
/
meta_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Convert a list of interactions into a meta interaction graph
import networkx as nt
from datetime import datetime
from collections import defaultdict
from memory_profiler import profile
from itertools import izip
import logging
logging.basicConfig(format="%(asctime)s;%(levelname)s;%(message)s",
datefmt="%Y-%m-%d %H:%M:%S")
logger = logging.getLogger("convert_to_meta_graph")
logger.setLevel(logging.DEBUG)
def convert_to_meta_graph(interaction_names, sources,
targets, datetimes,
preprune_secs=None):
"""
sources: list of source node id for each interaction
targets: list of target node ids for each interaction
datetimes: happening time of the interactions
All four fields shall be sorted from earliest to lastest
according to datetimes
"""
if isinstance(preprune_secs, int) or isinstance(preprune_secs, float):
logger.info("preprune_by_secs {} enabled..".format(preprune_secs))
else:
if preprune_secs is not None:
raise TypeError(
'preprune_secs should be int or float, is {}'.format(
type(preprune_secs)
)
)
assert len(interaction_names) == len(sources) == len(targets) == len(datetimes), \
"{},{},{},{}".format(
len(interaction_names), len(sources), len(targets), len(datetimes))
g = nt.DiGraph()
# source to nodes mapping
# interpretation:
# s is associated with a list of interactions that take it as source
p2i = defaultdict(set)
for row_n, (i, s, time) in enumerate(
izip(interaction_names,
sources,
datetimes)):
if (i, time) in p2i[s]:
logger.warning("{} added already".format((i, time)))
else:
p2i[s].add((i, time))
if isinstance(datetimes[0], datetime):
time_diff = lambda t1, t2: (t1 - t2).total_seconds()
else:
time_diff = lambda t1, t2: (t1 - t2)
for row_n, (i1, s, ts, time1) in enumerate(izip(
interaction_names, sources, targets, datetimes)):
if row_n % 5000 == 0:
logger.debug("building: {} / {}".format(
row_n, len(interaction_names)))
# remove entries of i1 in p2i
p2i[s].remove((i1, time1))
# add node, can be singleton
g.add_node(i1)
# add edges
# broadcast pattern
if True:
for i2, time2 in p2i[s]:
if time1 < time2:
if (preprune_secs is None or
time_diff(time2, time1) <= preprune_secs):
g.add_edge(i1, i2)
# relay pattern
for t in ts:
for i2, time2 in p2i[t]:
if time1 < time2:
if (preprune_secs is None or
time_diff(time2, time1) <= preprune_secs):
g.add_edge(i1, i2)
return g
def convert_to_meta_graph_undirected(node_names, participants, timestamps,
preprune_secs=None):
if isinstance(preprune_secs, int) or isinstance(preprune_secs, float):
logger.info("preprune_by_secs {} enabled..".format(preprune_secs))
else:
if preprune_secs is not None:
raise TypeError(
'preprune_secs should be int or float, is {}'.format(
type(preprune_secs)
)
)
assert len(node_names) == len(participants) == len(timestamps), \
"{},{},{}".format(
len(node_names), len(participants), len(timestamps))
g = nt.DiGraph()
# interpretation:
# s is associated with a list of interactions that take it as source
p2i = defaultdict(set)
for row_n, (i, ps, time) in enumerate(
izip(node_names,
participants,
timestamps)):
for p in ps:
if (i, time) in p2i[p]:
logger.warning("{} added already".format((i, time)))
else:
p2i[p].add((i, time))
for row_n, (i1, ps, time1) in enumerate(izip(
node_names, participants, timestamps)):
if row_n % 5000 == 0:
logger.debug("building: {} / {}".format(
row_n, len(node_names)))
# add node, can be singleton
g.add_node(i1)
# remove entries of i1 in p2i
for p in ps:
for i2, time2 in p2i[p]:
if time1 < time2:
if (preprune_secs is None or
time2 - time1 <= preprune_secs):
g.add_edge(i1, i2)
p2i[p].remove((i1, time1))
return g
def convert_to_original_graph(mg):
g = nt.DiGraph()
for m in mg.nodes():
s = mg.node[m]['sender']
g.add_node(s['id'], s)
for r in mg.node[m]['recipients']:
g.add_node(r['id'], r)
for n in mg.nodes():
sender = mg.node[n]['sender_id']
for recipient in mg.node[n]['recipient_ids']:
g.add_edge(sender, recipient)
return g