-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathremondini_years.py
153 lines (126 loc) · 4.68 KB
/
remondini_years.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
df = pd.read_json("./data/data.json")
"""
author_indices = {}
auths = []
# Iterate through the DataFrame
for index, row in df.iterrows():
authors = row['Authors']
for author in authors:
# Remove leading/trailing spaces
auths.append(author)
auths = set(auths)
auths = list(auths)
res = {}
for auth in auths:
if len(auth.split()) != 0:
first_name = auth.split()[0]
if first_name not in res.keys():
res[first_name] = [auth]
else:
res[first_name].append(auth)
f_res = [(key, value) for key, value in res.items() if len(value) != 1]
print(f_res)
print(len(f_res))
"""
author_years_dict = {}
for index, row in df.iterrows():
authors = row['Authors']
year = row['Date']
for author in authors:
if author in author_years_dict:
author_years_dict[author].append(year)
else:
author_years_dict[author] = [year]
auth_max_min_y = {}
for i in author_years_dict.items():
auth_max_min_y[i[0]] = (max(i[1]) - min(i[1]))
auth_max_min_y = dict(sorted(auth_max_min_y.items(), key=lambda item: item[1], reverse = True))
print(auth_max_min_y)
print(len(auth_max_min_y.keys()))
print(auth_max_min_y['Grugni G'])
print(auth_max_min_y['Grugni Graziano'])
eig = np.load("./results/eigenvector_centrality.npy")
deg = np.load("./results/degree_centrality.npy")
bet = np.load("./results/betweeness_centrality.npy")
clo = np.load("./results/closeness_centrality.npy")
eig_positions = {author: position for position, (author, _) in enumerate(eig, start=1)}
deg_positions = {author: position for position, (author, _) in enumerate(deg, start=1)}
bet_positions = {author: position for position, (author, _) in enumerate(bet, start=1)}
clo_positions = {author: position for position, (author, _) in enumerate(clo, start=1)}
p_list = [eig_positions, deg_positions, bet_positions, clo_positions]
r_list = []
for i in p_list:
auth_in_y = []
print(i.keys())
print('\n')
for auth in i.keys():
auth_in_y.append([auth, auth_max_min_y[auth]])
r_list.append(auth_in_y)
borda = np.load("./results/borda.npy")
borda_positions = {author: position for position, (author, _) in enumerate(borda, start=1)}
# Mapping colors to centrality measures
centrality_colors = {
'eig': 'red',
'deg': 'green',
'bet': 'blue',
'clo': 'orange',
}
# Function to plot scatter plot for a centrality measure
def plot_scatter(ax, data, title, color):
names = [item[0] for item in data]
values = [item[1] for item in data]
ax.scatter(names, values, color=color, s=5) # Adjust the size of points with 's'
ax.set_xlabel("Authors ordered by centrality measure")
ax.set_ylabel("Time span between most recent and oldest publication")
ax.set_title(title)
ax.set_xticks([]) # Remove x-axis ticks and labels
#ax.set_yticks([]) # Remove y-axis ticks and labels
# Create a 2 by 2 grid for subplots
fig, axs = plt.subplots(2, 2, figsize=(10, 8))
# Plot scatter plots for each centrality measure
plot_scatter(axs[0, 0], r_list[0], 'Eigenvector Centrality', centrality_colors['eig'])
plot_scatter(axs[0, 1], r_list[1], 'Degree Centrality', centrality_colors['deg'])
plot_scatter(axs[1, 0], r_list[2], 'Betweenness Centrality', centrality_colors['bet'])
plot_scatter(axs[1, 1], r_list[3], 'Closeness Centrality', centrality_colors['clo'])
plt.tight_layout()
plt.savefig("scatter_timespan.png")
plt.show()
"""
# Mapping colors to centrality measures
centrality_colors = {
'eig': 'red',
'deg': 'green',
'bet': 'blue',
'clo': 'orange',
}
borda = np.load("./results/borda.npy")
borda_positions = {author: position for position, (author, _) in enumerate(borda, start=1)}
print(borda)
# Add Borda Centrality positions to the positions list
borda_positions_list = []
for auth in borda_positions.keys():
borda_positions_list.append([auth, auth_max_min_y[auth]])
r_list.append(borda_positions_list)
# Function to plot histogram for a centrality measure
def plot_histogram(ax, data, title, color):
print(len(data))
values = [item[1] for item in data]
names = [item[0] for item in data]
ax.plot(names, values) #color=color, edgecolor=color)
ax.set_title(title)
ax.set_xlabel("Authors")
ax.set_ylabel("Time Span (Years)")
ax.set_xticks([]) # Remove x-axis ticks and labels
#ax.set_yticks([]) # Remove y-axis ticks and labels
# Create a 2 by 2 grid for subplots
fig, ax = plt.subplots(figsize=(10, 8))
# Plot histogram for Borda Centrality
plot_histogram(ax, r_list[4], 'Borda Score', 'purple')
plt.tight_layout()
#plt.savefig("ultima_figura.png")
plt.show()
"""