Automattic · joaopamaral · Feb 5, 2025 · Feb 5, 2025
diff --git a/meltano.yml b/meltano.yml
@@ -25,5 +25,7 @@ plugins:
     - name: extra_fields
     - name: extra_fields_types
     - name: partition_cols
+    - name: max_flatten_level
+      kind: integer
     config:
       start_date: '2010-01-01T00:00:00Z'
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "target-parquet"
-version = "1.0.3"
+version = "1.1.0"
 description = "`target-parquet` is a Singer target for parquet, built with the Meltano Singer SDK."
 readme = "README.md"
 authors = ["Joao Amaral <joao.amaral@automattic.com>"]

diff --git a/target_parquet/sinks.py b/target_parquet/sinks.py
@@ -19,15 +19,14 @@
 class ParquetSink(BatchSink):
     """parquet target sink class."""
 
-    flatten_max_level = 100  # Max level of nesting to flatten
-
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.pyarrow_df = None
         self.destination_path = os.path.join(
             self.config.get("destination_path", "output"), self.stream_name
         )
         self.files_saved = 0
+        self.flatten_max_level = self.config.get("max_flatten_level", 100)
 
         # Extra fields
         self.extra_values = (

diff --git a/target_parquet/target.py b/target_parquet/target.py
@@ -54,6 +54,12 @@ class TargetParquet(Target):
             "It can control the memory usage of the target.",
             default=10000,
         ),
+        th.Property(
+            "max_flatten_level",
+            th.IntegerType,
+            description="Max level of nesting to flatten",
+            default=100,
+        ),
         th.Property(
             "extra_fields",
             th.StringType,

diff --git a/tests/utils/test_parquet.py b/tests/utils/test_parquet.py
@@ -66,6 +66,42 @@ def test_flatten_schema_to_pyarrow_schema():
     assert pyarrow_schema == expected_pyarrow_schema
 
 
+def test_no_flatten_schema_to_pyarrow():
+    schema = {
+        "type": "object",
+        "properties": {
+            "str": {"type": ["null", "string"]},
+            "int": {"type": ["null", "integer"]},
+            "decimal": {"type": ["null", "number"]},
+            "nested": {
+                "type": "object",
+                "properties": {
+                    "nested_str": {"type": ["null", "string"]},
+                    "nested_int": {"type": ["null", "integer"]},
+                    "deep_nested": {
+                        "type": "object",
+                        "properties": {
+                            "deep_str": {"type": ["null", "string"]},
+                        },
+                    },
+                },
+            },
+        },
+    }
+
+    flatten_schema_result = flatten_schema(schema, max_level=0)
+    pyarrow_schema = flatten_schema_to_pyarrow_schema(flatten_schema_result)
+    expected_pyarrow_schema = pa.schema(
+        [
+            pa.field("str", pa.string()),
+            pa.field("int", pa.int64()),
+            pa.field("decimal", pa.float64()),
+            pa.field("nested", pa.string()),
+        ]
+    )
+
+    assert pyarrow_schema == expected_pyarrow_schema
+
 @pytest.mark.parametrize(
     "field_name, input_types, expected_result",
     [