-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess-talks.py
101 lines (74 loc) · 2.43 KB
/
process-talks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
GridPP - Processing Talks
See the README.md file and the GitHub wiki for more information.
http://www.gridpp.ac.uk/talks
"""
# Import the code needed to manage files.
import os, glob
#...for parsing the arguments.
import argparse
#...for the logging.
import logging as lg
#...for the Talk wrapper class.
from records.talks import Talk, TALK_CATEGORIES
if __name__ == "__main__":
print("*")
print("*========================*")
print("* GridPP - Process Talks *")
print("*========================*")
# Get the datafile path from the command line.
parser = argparse.ArgumentParser()
parser.add_argument("inputPath", help="Path to the input dataset.")
parser.add_argument("outputPath", help="The path for the output files.")
parser.add_argument("-v", "--verbose", help="Increase output verbosity", action="store_true")
args = parser.parse_args()
## The path to the data file.
datapath = args.inputPath
## The output path.
outputpath = args.outputPath
# Set the logging level.
if args.verbose:
level=lg.DEBUG
else:
level=lg.INFO
# Configure the logging.
lg.basicConfig(filename=os.path.join(outputpath, 'log_process-talks.log'), filemode='w', level=level)
print("*")
print("* Input path : '%s'" % (datapath))
print("* Output path : '%s'" % (outputpath))
print("*")
# Get the TSV file.
tf = open(datapath, "r")
lines = tf.readlines()
tf.close()
## Dictionary of talks.
talks = {}
#
for i in range(1,len(lines)):
## The current talk.
talk = Talk(lines[i])
# Populate the dictionary.
talks[talk] = talk.getCategory()
## The HTML string of talks.
hs = ""
# Loop over the categories.
for i, category in TALK_CATEGORIES.iteritems():
hs += "\n\n\n%s\n\n\n" % (category)
## List for the talks sorted by category.
sortedtalks = []
#
# Populate the list.
for talk, cat in talks.iteritems():
if cat == category:
sortedtalks.append(talk)
# Sort by date.
sortedtalks = sorted(sortedtalks, reverse=True)
#
for talk in sortedtalks:
hs += talk.getWebEntry()
hs += "\n"
# Write the HTML file.
with open(os.path.join(outputpath, "talks.html"), "w") as hf:
hf.write(hs)