Skip to content

Commit

Permalink
Get correct data if ODS spreadsheet has empty cells
Browse files Browse the repository at this point in the history
Fix #320
  • Loading branch information
turicas committed Mar 27, 2019
1 parent 9995c3c commit c569f94
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 26 deletions.
2 changes: 2 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
- [#314](https://github.com/turicas/rows/issues/314) rows pgimport fails if
using --schema
- [#309](https://github.com/turicas/rows/issues/309) Fix file-magic detection
- [#320](https://github.com/turicas/rows/issues/320) Get correct data if ODS
spreadsheet has empty cells

## Version `0.4.1` (bugfix release)

Expand Down
59 changes: 33 additions & 26 deletions rows/plugins/ods.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,41 +61,48 @@ def import_from_ods(filename_or_fobj, index=0, *args, **kwargs):
table_rows = []
for row_obj in table_rows_obj:
row = []
for cell in xpath(row_obj, "//table:table-cell", namespaces):
cells = reversed(xpath(row_obj, "//table:table-cell", namespaces))
row_started = False
for cell in cells:
children = cell.getchildren()
if not children:
continue

# TODO: evalute 'boolean' and 'time' types
value_type = attrib(cell, namespaces["office"], "value-type")
if value_type == "date":
cell_value = attrib(cell, namespaces["office"], "date-value")
elif value_type == "float":
cell_value = attrib(cell, namespaces["office"], "value")
elif value_type == "percentage":
cell_value = attrib(cell, namespaces["office"], "value")
cell_value = Decimal(cell_value)
cell_value = "{:%}".format(cell_value)
elif value_type == "string":
try:
# get computed string (from formula, for example)
cell_value = attrib(cell, namespaces["office"], "string-value")
except KeyError:
# computed string not present => get from <p>...</p>
cell_value = None
# TODO: check repeat
else:
# TODO: evalute 'boolean' and 'time' types
value_type = attrib(cell, namespaces["office"], "value-type")
if value_type == "date":
cell_value = attrib(cell, namespaces["office"], "date-value")
elif value_type == "float":
cell_value = attrib(cell, namespaces["office"], "value")
elif value_type == "percentage":
cell_value = attrib(cell, namespaces["office"], "value")
cell_value = Decimal(cell_value)
cell_value = "{:%}".format(cell_value)
elif value_type == "string":
try:
# get computed string (from formula, for example)
cell_value = attrib(cell, namespaces["office"], "string-value")
except KeyError:
# computed string not present => get from <p>...</p>
cell_value = children[0].text
else: # value_type == some type we don't know
cell_value = children[0].text
else: # value_type == some type we don't know
cell_value = children[0].text

try:
repeat = attrib(cell, namespaces["table"], "number-columns-repeated")
except KeyError:
row.append(cell_value)
row_started = True
else:
for _ in range(int(repeat)):
row.append(cell_value)

if row:
table_rows.append(row)
cell_data = [cell_value for _ in range(int(repeat))]
if set(cell_data) != set([None]) or row_started:
# This check will remove empty cells from the end
row.extend(cell_data)
row_started = True

if row and set(row) != set([None]):
table_rows.append(list(reversed(row)))

max_length = max(len(row) for row in table_rows)
full_rows = complete_with_None(table_rows, max_length)
Expand Down
Binary file modified tests/data/all-field-types.ods
Binary file not shown.
Binary file added tests/data/empty-cells.ods
Binary file not shown.
14 changes: 14 additions & 0 deletions tests/tests_plugin_ods.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,17 @@ def test_issue_290_one_hundred_read_as_1(self):
assert result[2][0] == Decimal("0.01")
assert result[3][0] == Decimal("0.1")
assert result[4][0] == Decimal("1")

def test_issue_320_empty_cells(self):
result = rows.import_from_ods("tests/data/empty-cells.ods")
header = "f1 f2 f3 f4 f5".split()
print(result[0])
data = [
[getattr(result[index], field) for field in header]
for index in range(5)
]
assert data[0] == ["r1f1", "r1f2", None, "r1f4", "r1f5"]
assert data[1] == ["r2f1", None, "r2f3", "r2f4", "r2f5"]
assert data[2] == [None, "r3f2", "r3f3", "r3f4", "r3f5"]
assert data[3] == ["r4f1", "r4f2", "r4f3", "r4f4", None]
assert data[4] == [None, None, "r5f3", "r5f4", "r5f5"]

0 comments on commit c569f94

Please # to comment.