forked from mikekatz04/LATW
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathform_groups.py
82 lines (63 loc) · 2.54 KB
/
form_groups.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import argparse
import numpy as np
import pandas as pd
def form_groups(
fp_registration: str, num_per_group: int = 5, num_sessions: int = 6
) -> None:
registration_info = pd.read_csv(fp_registration)
lisa_exp = registration_info["Rate your experience with LISA Data Analysis."]
python_exp = registration_info["Rate you experience with Python."]
group_work = registration_info["Group work"]
names = registration_info["Name"]
info_for_groups = []
for le, pe, gw, name in zip(lisa_exp, python_exp, group_work, names):
# chosen not to participate in group work
if gw.lower() != "yes":
continue
le = 0 if le == "Prefer not to say" else int(le)
pe = 0 if pe == "Prefer not to answer." else int(pe)
total_exp = le + pe
info_for_groups.append([name, total_exp])
df_orig = pd.DataFrame(
{
"Name": [tmp[0] for tmp in info_for_groups],
"exp": [tmp[1] for tmp in info_for_groups],
}
)
sessions = []
for session_i in range(num_sessions):
df = df_orig.copy().iloc[np.random.permutation(df_orig.shape[0])]
df = df.sort_values("exp")
total_groups = (df.shape[0] // num_per_group) + 1
groups = []
group_i = 0
for i in range(int(df.shape[0] / 2)):
group_i = i % total_groups
if len(groups) < total_groups:
groups.append([])
groups[group_i].append(df.iloc[i])
groups[group_i].append(df.iloc[df.shape[0] - (i + 1)])
# odd numbered
if df.shape[0] % 2 == 1:
group_i = (group_i + 1) % total_groups
groups[group_i].append(df.iloc[i + 1])
for i, group in enumerate(groups):
tmp = pd.concat(group, axis=1).T
tmp[f"group_{session_i}"] = np.full(tmp.shape[0], i + 1)
groups[i] = tmp
output_session = pd.concat(groups, axis=0)
sessions.append(output_session)
output = sessions[0].sort_values("Name").copy()
for session_i in range(1, len(sessions)):
session = sessions[session_i]
tmp = session.sort_values("Name")
assert np.all(tmp["Name"].to_numpy() == output["Name"].to_numpy())
output[f"group_{session_i}"] = tmp[f"group_{session_i}"]
output.to_csv("groups_for_LATW.csv")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="form groups for LATW",
)
parser.add_argument("filename") # positional argument
args = parser.parse_args()
form_groups(args.filename)