Get correct data if ODS spreadsheet has empty cells

Fix #320
turicas · Mar 27, 2019 · c569f94 · c569f94
1 parent 9995c3c
commit c569f94
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 26 deletions.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -23,6 +23,8 @@
 - [#314](https://github.com/turicas/rows/issues/314) rows pgimport fails if
   using --schema
 - [#309](https://github.com/turicas/rows/issues/309) Fix file-magic detection
+- [#320](https://github.com/turicas/rows/issues/320) Get correct data if ODS
+  spreadsheet has empty cells
 
 ## Version `0.4.1` (bugfix release)
 

diff --git a/rows/plugins/ods.py b/rows/plugins/ods.py
@@ -61,41 +61,48 @@ def import_from_ods(filename_or_fobj, index=0, *args, **kwargs):
     table_rows = []
     for row_obj in table_rows_obj:
         row = []
-        for cell in xpath(row_obj, "//table:table-cell", namespaces):
+        cells = reversed(xpath(row_obj, "//table:table-cell", namespaces))
+        row_started = False
+        for cell in cells:
             children = cell.getchildren()
             if not children:
-                continue
-
-            # TODO: evalute 'boolean' and 'time' types
-            value_type = attrib(cell, namespaces["office"], "value-type")
-            if value_type == "date":
-                cell_value = attrib(cell, namespaces["office"], "date-value")
-            elif value_type == "float":
-                cell_value = attrib(cell, namespaces["office"], "value")
-            elif value_type == "percentage":
-                cell_value = attrib(cell, namespaces["office"], "value")
-                cell_value = Decimal(cell_value)
-                cell_value = "{:%}".format(cell_value)
-            elif value_type == "string":
-                try:
-                    # get computed string (from formula, for example)
-                    cell_value = attrib(cell, namespaces["office"], "string-value")
-                except KeyError:
-                    # computed string not present => get from <p>...</p>
+                cell_value = None
+                # TODO: check repeat
+            else:
+                # TODO: evalute 'boolean' and 'time' types
+                value_type = attrib(cell, namespaces["office"], "value-type")
+                if value_type == "date":
+                    cell_value = attrib(cell, namespaces["office"], "date-value")
+                elif value_type == "float":
+                    cell_value = attrib(cell, namespaces["office"], "value")
+                elif value_type == "percentage":
+                    cell_value = attrib(cell, namespaces["office"], "value")
+                    cell_value = Decimal(cell_value)
+                    cell_value = "{:%}".format(cell_value)
+                elif value_type == "string":
+                    try:
+                        # get computed string (from formula, for example)
+                        cell_value = attrib(cell, namespaces["office"], "string-value")
+                    except KeyError:
+                        # computed string not present => get from <p>...</p>
+                        cell_value = children[0].text
+                else:  # value_type == some type we don't know
                     cell_value = children[0].text
-            else:  # value_type == some type we don't know
-                cell_value = children[0].text
 
             try:
                 repeat = attrib(cell, namespaces["table"], "number-columns-repeated")
             except KeyError:
                 row.append(cell_value)
+                row_started = True
             else:
-                for _ in range(int(repeat)):
-                    row.append(cell_value)
-
-        if row:
-            table_rows.append(row)
+                cell_data = [cell_value for _ in range(int(repeat))]
+                if set(cell_data) != set([None]) or row_started:
+                    # This check will remove empty cells from the end
+                    row.extend(cell_data)
+                    row_started = True
+
+        if row and set(row) != set([None]):
+            table_rows.append(list(reversed(row)))
 
     max_length = max(len(row) for row in table_rows)
     full_rows = complete_with_None(table_rows, max_length)

diff --git a/tests/data/all-field-types.ods b/tests/data/all-field-types.ods
diff --git a/tests/data/empty-cells.ods b/tests/data/empty-cells.ods
diff --git a/tests/tests_plugin_ods.py b/tests/tests_plugin_ods.py
@@ -72,3 +72,17 @@ def test_issue_290_one_hundred_read_as_1(self):
         assert result[2][0] == Decimal("0.01")
         assert result[3][0] == Decimal("0.1")
         assert result[4][0] == Decimal("1")
+
+    def test_issue_320_empty_cells(self):
+        result = rows.import_from_ods("tests/data/empty-cells.ods")
+        header = "f1 f2 f3 f4 f5".split()
+        print(result[0])
+        data = [
+            [getattr(result[index], field) for field in header]
+            for index in range(5)
+        ]
+        assert data[0] == ["r1f1", "r1f2", None, "r1f4", "r1f5"]
+        assert data[1] == ["r2f1", None, "r2f3", "r2f4", "r2f5"]
+        assert data[2] == [None, "r3f2", "r3f3", "r3f4", "r3f5"]
+        assert data[3] == ["r4f1", "r4f2", "r4f3", "r4f4", None]
+        assert data[4] == [None, None, "r5f3", "r5f4", "r5f5"]