-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretrieveuniquefields.py
76 lines (51 loc) · 1.68 KB
/
retrieveuniquefields.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import csv
from bs4 import BeautifulSoup
from pathlib import Path
#Variables
source = "Data/settlement/tables"
def retrieveFields(path):
base=os.path.basename(path)
with open(path) as inf:
txt = inf.read()
soup = BeautifulSoup(txt, 'html.parser')
tables = soup.find_all('table')
data_table = tables[5]
table = data_table.find('tbody')
rows = table.find_all('tr')
fields = []
for row in rows:
fields.append(row.contents[1].string)
return fields
# List Implementation
def retrieveUniqueFields():
pathlist = Path(source).glob('**/*.html')
all_fields = []
unique_fields = []
for path in pathlist:
path_in_str = str(path)
base=os.path.basename(path_in_str)
all_fields.extend(retrieveFields(path_in_str))
for field in all_fields:
if field not in unique_fields:
unique_fields.append(field)
else:
continue
return unique_fields
# Set Implementation
def retrieveUniqueFieldsSet():
pathlist = Path(source).glob('**/*.html')
for path in pathlist:
path_in_str = str(path)
base=os.path.basename(path_in_str)
all_fields.extend(retrieveFields(path_in_str))
all_fields_set = set(all_fields)
return all_fields_set
def writeUniqueFieldsToCSV(unique_fields):
with open("Result/settlement_csv/unique_fields.csv", "w") as csv_file:
writer = csv.writer(csv_file,delimiter="|")
writer.writerow(["Field","Description"])
for field in unique_fields:
writer.writerow([field, ""])
print "Done writing unique fields in CSV"
writeUniqueFieldsToCSV(retrieveUniqueFields())