-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkaggle_geomean.py
executable file
·31 lines (28 loc) · 1.01 KB
/
kaggle_geomean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from __future__ import division
from collections import defaultdict
from glob import glob
import sys
import math
glob_files = sys.argv[1]
loc_outfile = sys.argv[2]
def kaggle_bag(glob_files, loc_outfile, method="average", weights="uniform"):
if method == "average":
scores = defaultdict(float)
with open(loc_outfile,"wb") as outfile:
for i, glob_file in enumerate( glob(glob_files) ):
print "parsing:", glob_file
# sort glob_file by first column, ignoring the first line
lines = open(glob_file).readlines()
lines = [lines[0]] + sorted(lines[1:])
for e, line in enumerate( lines ):
if i == 0 and e == 0:
outfile.write(line)
if e > 0:
row = line.strip().split(",")
if scores[(e,row[0])] == 0:
scores[(e,row[0])] = 1
scores[(e,row[0])] *= float(row[1])
for j,k in sorted(scores):
outfile.write("%s,%f\n"%(k,math.pow(scores[(j,k)],1/(i+1))))
print("wrote to %s"%loc_outfile)
kaggle_bag(glob_files, loc_outfile)