-
-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathxls_reader.py
61 lines (48 loc) · 1.88 KB
/
xls_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python3.10
'''Module that reads an xls spreadsheet and can produce json data from it'''
import datetime
import xlrd
def get_column_names(sheet):
'''Takes a single worksheet, returns the strings in the top row of each column'''
row_size = sheet.row_len(0)
col_values = sheet.row_values(0, 0, row_size )
column_names = []
for value in col_values:
column_names.append(value)
return column_names
def get_row_data(row, column_names):
'''takes a single row of a worksheet and an array of rows,
returns an object with column_name:rowvalue
'''
row_data = {}
counter = 0
for cell in row:
# check if it is of date type print in iso format
if cell.ctype==xlrd.XL_CELL_DATE:
row_data[column_names[counter].lower().replace(' ', '_')] = datetime.datetime(*xlrd.xldate_as_tuple(cell.value,0)).isoformat()
else:
row_data[column_names[counter].lower().replace(' ', '_')] = cell.value
counter +=1
return row_data
def get_sheet_data(sheet, column_names):
'''Takes a single worksheet, returns an object with row data'''
num_of_rows = sheet.nrows
sheet_data = []
for idx in range(1, num_of_rows):
row = sheet.row(idx)
row_data = get_row_data(row, column_names)
sheet_data.append(row_data)
return sheet_data
def get_workbook_data(workbook):
'''Takes a workbook and returns all worksheet data'''
nsheets = workbook.nsheets
workbook_data = {}
for idx in range(0, nsheets):
worksheet = workbook.sheet_by_index(idx)
column_names = get_column_names(worksheet)
sheetdata = get_sheet_data(worksheet, column_names)
workbook_data[worksheet.name.lower().replace(' ', '_')] = sheetdata
return workbook_data
def get_workbook(filename):
'''opens a workbook for reading'''
return xlrd.open_workbook(filename)