From b0a8b3f2244f7047a88fbda9073fbd0347a1ce4c Mon Sep 17 00:00:00 2001 From: Lingbo Liu Date: Thu, 13 Feb 2025 15:02:44 -0500 Subject: [PATCH 1/9] update all 4 IO nodes 1 allow choosing file on local drive and using URL 2 allow encoding parameter 3 add option to create missing folder while saving files --- knime_extension/src/nodes/io.py | 182 ++++++++++++++++++++++++++------ 1 file changed, 152 insertions(+), 30 deletions(-) diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py index 755d5c9f..7b7e9281 100644 --- a/knime_extension/src/nodes/io.py +++ b/knime_extension/src/nodes/io.py @@ -46,6 +46,50 @@ class ExistingFile(knext.EnumParameterOptions): ) +def validate_path(path: str) -> None: + # no path check + pass + + +class _EncodingOptions(knext.EnumParameterOptions): + AUTO = ( + "Auto", + "Automatically detect the encoding from common options", + ) + UTF8 = ( + "UTF-8", + "Unicode Transformation Format - 8 bit. Default encoding suitable for most modern GIS data files.", + ) + GB18030 = ( + "GB18030", + "Chinese National Standard encoding. More comprehensive than GBK.", + ) + GBK = ( + "GBK", + "Chinese internal code specification. Common in Chinese GIS software.", + ) + GB2312 = ( + "GB2312", + "Basic Simplified Chinese character encoding.", + ) + LATIN1 = ( + "ISO-8859-1", + "Latin-1 encoding. Suitable for Western European languages.", + ) + WINDOWS1252 = ( + "Windows-1252", + "Windows Western European encoding. Common in Windows systems.", + ) + ASCII = ( + "ASCII", + "Basic ASCII encoding. Only for standard ASCII characters.", + ) + + @classmethod + def get_default(cls): + return cls.AUTO + + ############################################ # GeoFile Reader ############################################ @@ -87,10 +131,20 @@ class ExistingFile(knext.EnumParameterOptions): }, ) class GeoFileReaderNode: - data_url = knext.StringParameter( + data_url = knext.LocalPathParameter( "Input file path", - "The file path for reading data.", - "", + "Select the file path for reading data.", + placeholder_text="Select input file path...", + validator=validate_path, + ) + + encoding = knext.EnumParameter( + label="Encoding", + description="Select the encoding for reading the data file.", + default_value=_EncodingOptions.get_default().name, + enum=_EncodingOptions, + since_version="1.3.0", + is_advanced=True, ) def configure(self, configure_context): @@ -132,7 +186,10 @@ def execute(self, exec_context: knext.ExecutionContext): ): gdf = gp.read_parquet(self.data_url) else: - gdf = gp.read_file(self.data_url) + if self.encoding == _EncodingOptions.AUTO.name: + gdf = gp.read_file(self.data_url) + else: + gdf = gp.read_file(self.data_url, encoding=self.encoding) if "" in gdf.columns: gdf = gdf.drop(columns="") @@ -144,6 +201,8 @@ def execute(self, exec_context: knext.ExecutionContext): ############################################ # GeoFile Writer ############################################ + + @knext.node( name="GeoFile Writer", node_type=knext.NodeType.SINK, @@ -171,6 +230,7 @@ def execute(self, exec_context: knext.ExecutionContext): }, ) class GeoFileWriterNode: + geo_col = knext.ColumnParameter( "Geometry column", "Select the geometry column for Geodata.", @@ -180,11 +240,11 @@ class GeoFileWriterNode: include_none_column=False, ) - data_url = knext.StringParameter( + data_url = knext.LocalPathParameter( "Output file path", - """The file path for writing data. The file extension e.g. *.shp*, *.geojson*, or *.parquet* is appended -automatically depending on the selected file format if not specified.""", - "", + "Select the file path for saving data.", + placeholder_text="Select output file path...", + validator=validate_path, ) existing_file = knext.EnumParameter( @@ -214,6 +274,15 @@ class GeoFileWriterNode: since_version="1.2.0", ).rule(knext.OneOf(dataformat, ["GeoParquet"]), knext.Effect.SHOW) + encoding = knext.EnumParameter( + label="Encoding", + description="Select the encoding for reading the data file.", + default_value=_EncodingOptions.get_default().name, + enum=_EncodingOptions, + since_version="1.3.0", + is_advanced=True, + ) + def configure(self, configure_context, input_schema): self.geo_col = knut.column_exists_or_preset( configure_context, self.geo_col, input_schema, knut.is_geo @@ -225,6 +294,12 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): 0.4, "Writing file (This might take a while without progress changes)" ) + import os + + output_dir = os.path.dirname(self.data_url) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + gdf = gp.GeoDataFrame(input_1.to_pandas(), geometry=self.geo_col) if "" in gdf.columns: gdf = gdf.drop(columns="") @@ -233,7 +308,11 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): if self.dataformat == "Shapefile": fileurl = knut.ensure_file_extension(self.data_url, ".shp") self.__check_overwrite(fileurl) - gdf.to_file(fileurl) + if self.encoding == _EncodingOptions.AUTO.name: + gdf.to_file(fileurl) + else: + gdf.to_file(fileurl, encoding=self.encoding) + elif self.dataformat == "GeoParquet": if self.parquet_compression == Compression.NONE.name: file_extension = ".parquet" @@ -253,7 +332,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): else: fileurl = knut.ensure_file_extension(self.data_url, ".geojson") self.__check_overwrite(fileurl) - gdf.to_file(fileurl, driver="GeoJSON") + if self.encoding == _EncodingOptions.AUTO.name: + gdf.to_file(fileurl) + else: + gdf.to_file(fileurl, driver="GeoJSON", encoding=self.encoding) return None def __check_overwrite(self, fileurl): @@ -303,10 +385,11 @@ def __check_overwrite(self, fileurl): }, ) class GeoPackageReaderNode: - data_url = knext.StringParameter( + data_url = knext.LocalPathParameter( "Input file path", - "The file path for reading data.", - "", + "Select the file path for reading data.", + placeholder_text="Select input file path...", + validator=validate_path, ) data_layer = knext.StringParameter( @@ -315,6 +398,15 @@ class GeoPackageReaderNode: "", ) + encoding = knext.EnumParameter( + label="Encoding", + description="Select the encoding for reading the data file.", + default_value=_EncodingOptions.get_default().name, + enum=_EncodingOptions, + since_version="1.3.0", + is_advanced=True, + ) + def configure(self, configure_context): # TODO Create combined schema return None @@ -327,27 +419,37 @@ def execute(self, exec_context: knext.ExecutionContext): import pandas as pd layerlist = fiona.listlayers(self.data_url) - pnumber = pd.Series(range(0, 100)).astype(str).to_list() - if self.data_layer in layerlist: - src = fiona.open(self.data_url, layer=self.data_layer) - elif self.data_layer in pnumber: - nlayer = int(self.data_layer) - src = fiona.open(self.data_url, layer=nlayer) - else: - src = fiona.open(self.data_url, layer=0) + layer = self._get_layer(layerlist) + + open_params = {"path": self.data_url, "layer": layer} + if self.encoding != _EncodingOptions.AUTO.name: + open_params["encoding"] = self.encoding + + src = fiona.open(**open_params) gdf = gp.GeoDataFrame.from_features(src) + try: gdf.crs = src.crs except: print("Invalid CRS") - gdf = gdf.reset_index(drop=True) - if "" in gdf.columns: - gdf = gdf.drop(columns="") - if "" in gdf.columns: - gdf = gdf.drop(columns="") + + gdf = self._clean_dataframe(gdf) + listtable = pd.DataFrame({"layerlist": layerlist}) return knext.Table.from_pandas(gdf), knext.Table.from_pandas(listtable) + def _get_layer(self, layerlist): + if self.data_layer in layerlist: + return self.data_layer + elif self.data_layer.isdigit() and 0 <= int(self.data_layer) < 100: + return int(self.data_layer) + return 0 + + def _clean_dataframe(self, df): + df = df.reset_index(drop=True) + columns_to_drop = ["", ""] + return df.drop(columns=[col for col in columns_to_drop if col in df.columns]) + ############################################ # GeoPackage Writer @@ -387,10 +489,11 @@ class GeoPackageWriterNode: include_none_column=False, ) - data_url = knext.StringParameter( + data_url = knext.LocalPathParameter( "Output file path", - "The file path for saving data.", - "", + "Select the file path for saving data.", + placeholder_text="Select output file path...", + validator=validate_path, ) data_layer = knext.StringParameter( @@ -399,6 +502,15 @@ class GeoPackageWriterNode: "new", ) + encoding = knext.EnumParameter( + label="Encoding", + description="Select the encoding for reading the data file.", + default_value=_EncodingOptions.get_default().name, + enum=_EncodingOptions, + since_version="1.3.0", + is_advanced=True, + ) + def configure(self, configure_context, input_schema): self.geo_col = knut.column_exists_or_preset( configure_context, self.geo_col, input_schema, knut.is_geo @@ -409,6 +521,12 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): exec_context.set_progress( 0.4, "Writing file (This might take a while without progress changes)" ) + import os + + output_dir = os.path.dirname(self.data_url) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + gdf = gp.GeoDataFrame(input_1.to_pandas(), geometry=self.geo_col) gdf = gdf.reset_index(drop=True) file_name = knut.ensure_file_extension(self.data_url, ".gpkg") @@ -423,5 +541,9 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): gdf = gdf.drop(columns="") if "" in gdf.columns: gdf = gdf.drop(columns="") - gdf.to_file(file_name, layer=self.data_layer, driver="GPKG") + if self.encoding == _EncodingOptions.AUTO.name: + gdf.to_file(file_name, layer=self.data_layer, driver="GPKG") + else: + gdf.to_file(file_name, layer=self.data_layer, driver="GPKG", encoding=self.encoding) + return None From 6454e6ff1ca1204b8125317a81a85babe7ba5f90 Mon Sep 17 00:00:00 2001 From: Lingbo Liu Date: Thu, 13 Feb 2025 15:56:45 -0500 Subject: [PATCH 2/9] add GML to GeoFIle Writer node GML format is already readable in GeoFile Reader --- knime_extension/src/nodes/io.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py index 7b7e9281..44c4e823 100644 --- a/knime_extension/src/nodes/io.py +++ b/knime_extension/src/nodes/io.py @@ -185,6 +185,7 @@ def execute(self, exec_context: knext.ExecutionContext): or self.data_url.lower().endswith(".parquet.snappy") ): gdf = gp.read_parquet(self.data_url) + else: if self.encoding == _EncodingOptions.AUTO.name: gdf = gp.read_file(self.data_url) @@ -263,7 +264,7 @@ class GeoFileWriterNode: "Output file format", "The file format to use.", "Shapefile", - enum=["Shapefile", "GeoJSON", "GeoParquet"], + enum=["Shapefile", "GeoJSON", "GeoParquet", "GML"], ) parquet_compression = knext.EnumParameter( @@ -329,13 +330,20 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): fileurl = knut.ensure_file_extension(self.data_url, file_extension) self.__check_overwrite(fileurl) gdf.to_parquet(fileurl, compression=compression) - else: + elif self.dataformat == "GeoJSON": fileurl = knut.ensure_file_extension(self.data_url, ".geojson") self.__check_overwrite(fileurl) if self.encoding == _EncodingOptions.AUTO.name: gdf.to_file(fileurl) else: gdf.to_file(fileurl, driver="GeoJSON", encoding=self.encoding) + else: + fileurl = knut.ensure_file_extension(self.data_url, ".gml") + self.__check_overwrite(fileurl) + if self.encoding == _EncodingOptions.AUTO.name: + gdf.to_file(fileurl) + else: + gdf.to_file(fileurl, driver="GML", encoding=self.encoding) return None def __check_overwrite(self, fileurl): @@ -542,8 +550,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): if "" in gdf.columns: gdf = gdf.drop(columns="") if self.encoding == _EncodingOptions.AUTO.name: - gdf.to_file(file_name, layer=self.data_layer, driver="GPKG") - else: - gdf.to_file(file_name, layer=self.data_layer, driver="GPKG", encoding=self.encoding) + gdf.to_file(file_name, layer=self.data_layer, driver="GPKG") + else: + gdf.to_file( + file_name, layer=self.data_layer, driver="GPKG", encoding=self.encoding + ) return None From ad4911d2df8d55b74383cbd39cd3367152517daf Mon Sep 17 00:00:00 2001 From: Lingbo Liu Date: Mon, 17 Feb 2025 09:47:35 -0500 Subject: [PATCH 3/9] update ignore invalid geometry in GeoFile and Geopackage reader Solve issue by by using pyogrio as engine Simplify the Geopackage Reader --- knime_extension/src/nodes/io.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py index 44c4e823..46b8dfa8 100644 --- a/knime_extension/src/nodes/io.py +++ b/knime_extension/src/nodes/io.py @@ -188,9 +188,14 @@ def execute(self, exec_context: knext.ExecutionContext): else: if self.encoding == _EncodingOptions.AUTO.name: - gdf = gp.read_file(self.data_url) + gdf = gp.read_file(self.data_url, engine="pyogrio", on_invalid="ignore") else: - gdf = gp.read_file(self.data_url, encoding=self.encoding) + gdf = gp.read_file( + self.data_url, + encoding=self.encoding, + engine="pyogrio", + on_invalid="ignore", + ) if "" in gdf.columns: gdf = gdf.drop(columns="") @@ -429,17 +434,18 @@ def execute(self, exec_context: knext.ExecutionContext): layerlist = fiona.listlayers(self.data_url) layer = self._get_layer(layerlist) - open_params = {"path": self.data_url, "layer": layer} - if self.encoding != _EncodingOptions.AUTO.name: - open_params["encoding"] = self.encoding - - src = fiona.open(**open_params) - gdf = gp.GeoDataFrame.from_features(src) - - try: - gdf.crs = src.crs - except: - print("Invalid CRS") + if self.encoding == _EncodingOptions.AUTO.name: + gdf = gp.read_file( + self.data_url, layer=layer, engine="pyogrio", on_invalid="ignore" + ) + else: + gdf = gp.read_file( + self.data_url, + layer=layer, + engine="pyogrio", + on_invalid="ignore", + encoding=self.encoding, + ) gdf = self._clean_dataframe(gdf) From 4fc2d1440439947f97452305beea8c8084e4c044 Mon Sep 17 00:00:00 2001 From: Lingbo Liu Date: Mon, 17 Feb 2025 16:38:09 -0500 Subject: [PATCH 4/9] unitfy clean and check funtions,add check exist in GeoPackage --- knime_extension/src/nodes/io.py | 61 ++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py index 46b8dfa8..64d94a93 100644 --- a/knime_extension/src/nodes/io.py +++ b/knime_extension/src/nodes/io.py @@ -51,6 +51,18 @@ def validate_path(path: str) -> None: pass +def clean_dataframe(df): + df = df.reset_index(drop=True) + columns_to_drop = ["", ""] + return df.drop(columns=[col for col in columns_to_drop if col in df.columns]) + + +def check_overwrite(fileurl, existing_file): + if existing_file == ExistingFile.FAIL.name: + if os.path.exists(fileurl): + raise knext.InvalidParametersError() + + class _EncodingOptions(knext.EnumParameterOptions): AUTO = ( "Auto", @@ -197,10 +209,7 @@ def execute(self, exec_context: knext.ExecutionContext): on_invalid="ignore", ) - if "" in gdf.columns: - gdf = gdf.drop(columns="") - if "" in gdf.columns: - gdf = gdf.drop(columns="") + gdf = clean_dataframe(gdf) return knext.Table.from_pandas(gdf) @@ -313,7 +322,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): gdf = gdf.drop(columns="") if self.dataformat == "Shapefile": fileurl = knut.ensure_file_extension(self.data_url, ".shp") - self.__check_overwrite(fileurl) + check_overwrite(fileurl, self.existing_file) if self.encoding == _EncodingOptions.AUTO.name: gdf.to_file(fileurl) else: @@ -333,33 +342,24 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): file_extension = ".parquet.snappy" compression = "snappy" fileurl = knut.ensure_file_extension(self.data_url, file_extension) - self.__check_overwrite(fileurl) + check_overwrite(fileurl, self.existing_file) gdf.to_parquet(fileurl, compression=compression) elif self.dataformat == "GeoJSON": fileurl = knut.ensure_file_extension(self.data_url, ".geojson") - self.__check_overwrite(fileurl) + check_overwrite(fileurl, self.existing_file) if self.encoding == _EncodingOptions.AUTO.name: gdf.to_file(fileurl) else: gdf.to_file(fileurl, driver="GeoJSON", encoding=self.encoding) else: fileurl = knut.ensure_file_extension(self.data_url, ".gml") - self.__check_overwrite(fileurl) + check_overwrite(fileurl, self.existing_file) if self.encoding == _EncodingOptions.AUTO.name: gdf.to_file(fileurl) else: gdf.to_file(fileurl, driver="GML", encoding=self.encoding) return None - def __check_overwrite(self, fileurl): - if self.existing_file == ExistingFile.FAIL.name: - import os.path - - if os.path.exists(fileurl): - raise knext.InvalidParametersError( - "File already exists and should not be overwritten." - ) - ############################################ # GeoPackage Reader @@ -447,7 +447,7 @@ def execute(self, exec_context: knext.ExecutionContext): encoding=self.encoding, ) - gdf = self._clean_dataframe(gdf) + gdf = clean_dataframe(gdf) listtable = pd.DataFrame({"layerlist": layerlist}) return knext.Table.from_pandas(gdf), knext.Table.from_pandas(listtable) @@ -459,11 +459,6 @@ def _get_layer(self, layerlist): return int(self.data_layer) return 0 - def _clean_dataframe(self, df): - df = df.reset_index(drop=True) - columns_to_drop = ["", ""] - return df.drop(columns=[col for col in columns_to_drop if col in df.columns]) - ############################################ # GeoPackage Writer @@ -525,6 +520,18 @@ class GeoPackageWriterNode: is_advanced=True, ) + existing_file = knext.EnumParameter( + "If exists:", + "Specify the behavior of the node in case the output file already exists.", + lambda v: ( + ExistingFile.OVERWRITE.name + if v < knext.Version(1, 3, 0) + else ExistingFile.FAIL.name + ), + enum=ExistingFile, + since_version="1.3.0", + ) + def configure(self, configure_context, input_schema): self.geo_col = knut.column_exists_or_preset( configure_context, self.geo_col, input_schema, knut.is_geo @@ -537,6 +544,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): ) import os + check_overwrite(self.data_url, self.existing_file) output_dir = os.path.dirname(self.data_url) if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True) @@ -551,10 +559,9 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): ).columns if len(time_columns) > 0: gdf[time_columns] = gdf[time_columns].astype(str) - if "" in gdf.columns: - gdf = gdf.drop(columns="") - if "" in gdf.columns: - gdf = gdf.drop(columns="") + + gdf = clean_dataframe(gdf) + if self.encoding == _EncodingOptions.AUTO.name: gdf.to_file(file_name, layer=self.data_layer, driver="GPKG") else: From 22802262735d568289c0630c5aefab8244b88c33 Mon Sep 17 00:00:00 2001 From: Lingbo Liu Date: Mon, 17 Feb 2025 16:58:33 -0500 Subject: [PATCH 5/9] fix bug on check_overwrite function --- knime_extension/src/nodes/io.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py index 64d94a93..eb2fd35d 100644 --- a/knime_extension/src/nodes/io.py +++ b/knime_extension/src/nodes/io.py @@ -58,9 +58,11 @@ def clean_dataframe(df): def check_overwrite(fileurl, existing_file): + import os + if existing_file == ExistingFile.FAIL.name: if os.path.exists(fileurl): - raise knext.InvalidParametersError() + raise knext.InvalidParametersError("File already exists.") class _EncodingOptions(knext.EnumParameterOptions): From 82ae35450388cf66c7b1c196151cdb17dcbbb7e8 Mon Sep 17 00:00:00 2001 From: Tobias Koetter Date: Tue, 15 Apr 2025 11:47:14 +0200 Subject: [PATCH 6/9] Add pyogrio dependency --- knime_extension/geospatial_env.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/knime_extension/geospatial_env.yml b/knime_extension/geospatial_env.yml index a549339f..808036d5 100644 --- a/knime_extension/geospatial_env.yml +++ b/knime_extension/geospatial_env.yml @@ -24,6 +24,7 @@ dependencies: - numpy=1.23.5 #required to fix problem with latest version of numpy - osmnx=1.7.0 - polyline 2.0.0 + - pyogrio=0.10.0 - pyproj=3.5.0 - pysal=23.1 - rasterio=1.3.6 From 5bc27ab8e4cfcda4bcf168ef9b78f66fa8d64654 Mon Sep 17 00:00:00 2001 From: Tobias Koetter Date: Wed, 30 Apr 2025 14:38:47 +0200 Subject: [PATCH 7/9] Bump extension version to 1.4 in preparation of the release --- knime_extension/knime.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/knime_extension/knime.yml b/knime_extension/knime.yml index df1db527..86e2177e 100644 --- a/knime_extension/knime.yml +++ b/knime_extension/knime.yml @@ -10,5 +10,5 @@ extension_module: src/geospatial_ext # The .py Python module containing the node env_yml_path: geospatial_env.yml # This is necessary for bundling, but not needed during development feature_dependencies: - org.knime.features.geospatial 4.7.0 - - org.knime.features.core 5.2.0 #ensure that this extension can only be used with 5.2 with date time support + - org.knime.features.core 5.3.0 #ensure that this extension can only be used with 5.3 with file browsing support From c1f83778a3453645f3169c6e5d646a48c6d4db41 Mon Sep 17 00:00:00 2001 From: Tobias Koetter Date: Wed, 30 Apr 2025 14:39:14 +0200 Subject: [PATCH 8/9] Change since_version of the new parameters to 1.4 --- knime_extension/src/nodes/io.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py index eb2fd35d..0ffe2b14 100644 --- a/knime_extension/src/nodes/io.py +++ b/knime_extension/src/nodes/io.py @@ -157,7 +157,7 @@ class GeoFileReaderNode: description="Select the encoding for reading the data file.", default_value=_EncodingOptions.get_default().name, enum=_EncodingOptions, - since_version="1.3.0", + since_version="1.4.0", is_advanced=True, ) @@ -296,7 +296,7 @@ class GeoFileWriterNode: description="Select the encoding for reading the data file.", default_value=_EncodingOptions.get_default().name, enum=_EncodingOptions, - since_version="1.3.0", + since_version="1.4.0", is_advanced=True, ) @@ -418,7 +418,7 @@ class GeoPackageReaderNode: description="Select the encoding for reading the data file.", default_value=_EncodingOptions.get_default().name, enum=_EncodingOptions, - since_version="1.3.0", + since_version="1.4.0", is_advanced=True, ) @@ -518,7 +518,7 @@ class GeoPackageWriterNode: description="Select the encoding for reading the data file.", default_value=_EncodingOptions.get_default().name, enum=_EncodingOptions, - since_version="1.3.0", + since_version="1.4.0", is_advanced=True, ) @@ -531,7 +531,7 @@ class GeoPackageWriterNode: else ExistingFile.FAIL.name ), enum=ExistingFile, - since_version="1.3.0", + since_version="1.4.0", ) def configure(self, configure_context, input_schema): From a04e2f321be58bee98ce6e664b61f19a52559e02 Mon Sep 17 00:00:00 2001 From: Tobias Koetter Date: Wed, 7 May 2025 17:11:46 +0200 Subject: [PATCH 9/9] Improve node description and some smaller code improvements --- knime_extension/src/nodes/io.py | 85 +++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 31 deletions(-) diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py index 0ffe2b14..e01b19c0 100644 --- a/knime_extension/src/nodes/io.py +++ b/knime_extension/src/nodes/io.py @@ -52,17 +52,54 @@ def validate_path(path: str) -> None: def clean_dataframe(df): + """ + Cleans the given DataFrame by resetting its index and removing specific columns. + + This function resets the index of the DataFrame, dropping the old index, + and removes the columns "" and "" if they exist in the DataFrame. + + Args: + df (pandas.DataFrame): The input DataFrame to be cleaned. + + Returns: + pandas.DataFrame: A cleaned DataFrame with the index reset and specified columns removed. + """ df = df.reset_index(drop=True) columns_to_drop = ["", ""] return df.drop(columns=[col for col in columns_to_drop if col in df.columns]) def check_overwrite(fileurl, existing_file): + """ + Checks if a file already exists and raises an error if overwriting is not allowed. + Args: + fileurl (str): The path to the file to check. + existing_file (Enum): An enumeration value indicating the overwrite policy. + It should have a `FAIL` member to signify that overwriting is not allowed. + Raises: + knext.InvalidParametersError: If the file exists and the overwrite policy is set to FAIL. + """ + import os + + if existing_file == ExistingFile.FAIL.name and os.path.exists(fileurl): + raise knext.InvalidParametersError("File already exists.") + + +def check_outdir(fileurl): + """ + Ensures that the directory for the given file path exists. If the directory + does not exist, it is created. + Args: + fileurl (str): The file path for which the directory should be checked + and created if necessary. + Raises: + OSError: If the directory cannot be created due to an operating system error. + """ import os - if existing_file == ExistingFile.FAIL.name: - if os.path.exists(fileurl): - raise knext.InvalidParametersError("File already exists.") + output_dir = os.path.dirname(fileurl) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) class _EncodingOptions(knext.EnumParameterOptions): @@ -131,8 +168,7 @@ def get_default(cls): For more details on the limitations when reading these files see [here.](https://gdal.org/drivers/vector/kml.html#kml-reading) -Examples of standard local file paths are *C:\\KNIMEworkspace\\test.geojson* for Windows and -*/KNIMEworkspace/test.shp* for Linux. The node can also load resources directly from a web URL, for example to +The node can load resources directly from a web URL, for example to load a GeoJSON file from [geojson.xyz](http://geojson.xyz/) you would enter *http://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_110m_land.geojson*. @@ -147,8 +183,8 @@ def get_default(cls): class GeoFileReaderNode: data_url = knext.LocalPathParameter( "Input file path", - "Select the file path for reading data.", - placeholder_text="Select input file path...", + "Select the file path or directly enter a remote URL for reading the data.", + placeholder_text="Select input file path or enter URL...", validator=validate_path, ) @@ -235,8 +271,6 @@ def execute(self, exec_context: knext.ExecutionContext): short_description="Write single layer GeoFile.", description="""This node writes the data in the format of [Shapefile](https://en.wikipedia.org/wiki/Shapefile), [GeoJSON](https://geojson.org/), or [GeoParquet](https://github.com/opengeospatial/geoparquet). -Examples of standard local file paths are *C:\\KNIMEworkspace\\test.shp* for Windows and -*/KNIMEworkspace/test.geojson* for Linux. The file extension e.g. *.shp*, *.geojson*, or *.parquet* is appended automatically depending on the selected file format if not specified.""", @@ -293,7 +327,7 @@ class GeoFileWriterNode: encoding = knext.EnumParameter( label="Encoding", - description="Select the encoding for reading the data file.", + description="Select the encoding for saving the data file.", default_value=_EncodingOptions.get_default().name, enum=_EncodingOptions, since_version="1.4.0", @@ -311,17 +345,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): 0.4, "Writing file (This might take a while without progress changes)" ) - import os - - output_dir = os.path.dirname(self.data_url) - if output_dir and not os.path.exists(output_dir): - os.makedirs(output_dir, exist_ok=True) - + check_outdir(self.data_url) gdf = gp.GeoDataFrame(input_1.to_pandas(), geometry=self.geo_col) - if "" in gdf.columns: - gdf = gdf.drop(columns="") - if "" in gdf.columns: - gdf = gdf.drop(columns="") + gdf = clean_dataframe(gdf) + if self.dataformat == "Shapefile": fileurl = knut.ensure_file_extension(self.data_url, ".shp") check_overwrite(fileurl, self.existing_file) @@ -389,8 +416,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): You can also enter the number of the layer to read starting at 0. The node will output the names of all layers as second output table, which can be used to revise the name of the target layer. -Examples of standard local file paths are *C:\\KNIMEworkspace\\test.gpkg* for Windows and -*/KNIMEworkspace/test.gpkg* for Linux. The node can also load resources directly from a web URL. +The node can load resources directly from a web URL e.g. +*https://github.com/INSPIRE-MIF/gp-geopackage-encodings/raw/refs/heads/main/examples/GE-gpkg-template.gpkg*. **Note:** For larger files the node progress might not change for a time until the file is successfully read. """, @@ -402,8 +429,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): class GeoPackageReaderNode: data_url = knext.LocalPathParameter( "Input file path", - "Select the file path for reading data.", - placeholder_text="Select input file path...", + "Select the file path or directly enter a remote URL for reading the data.", + placeholder_text="Select input file path or enter URL...", validator=validate_path, ) @@ -480,8 +507,6 @@ def _get_layer(self, layerlist): short_description="Write GeoPackage layer.", description="""This node writes the data as new [Geopackage](https://www.geopackage.org/) file or as layer into an existing file. -Examples of standard local file paths are *C:\\KNIMEworkspace\\test.gpkg* for Windows and -*/KNIMEworkspace/test.gpkg* for Linux. **Note:** If file and layer already exist, the layer will be overwritten without a warning! """, @@ -515,7 +540,7 @@ class GeoPackageWriterNode: encoding = knext.EnumParameter( label="Encoding", - description="Select the encoding for reading the data file.", + description="Select the encoding for saving the data file.", default_value=_EncodingOptions.get_default().name, enum=_EncodingOptions, since_version="1.4.0", @@ -544,12 +569,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_1): exec_context.set_progress( 0.4, "Writing file (This might take a while without progress changes)" ) - import os check_overwrite(self.data_url, self.existing_file) - output_dir = os.path.dirname(self.data_url) - if output_dir and not os.path.exists(output_dir): - os.makedirs(output_dir, exist_ok=True) + + check_outdir(self.data_url) gdf = gp.GeoDataFrame(input_1.to_pandas(), geometry=self.geo_col) gdf = gdf.reset_index(drop=True)