diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..81322db --- /dev/null +++ b/requirements.txt @@ -0,0 +1,36 @@ +tqdm +pandas +pillow +matplotlib +shapely +geopandas +rioxarray +rasterio +geoplot +pyarrow +fiona +pre-commit +Sphinx +sphinx_rtd_theme +pycocotools +funcy +argparse +scikit-learn +scikit-multilearn +netron +onnx +onnxruntime +opencv-python +roboflow +torch +torchvision +wandb +geojson +groundingdino-py +mercantile +numpy +osmtogeojson +overpass +overpy +requests +segment-geospatial diff --git a/scripts/batch_geojson2coco.py b/scripts/batch_geojson2coco.py index 944a50a..6e2f104 100644 --- a/scripts/batch_geojson2coco.py +++ b/scripts/batch_geojson2coco.py @@ -15,8 +15,6 @@ from pycocotools.coco import COCO from shapely.geometry import box from tqdm import tqdm -from pathlib import Path - from aigis.convert.coordinates import wkt_parser @@ -38,6 +36,7 @@ def format_string(s, length=23): format_specifier = "{:<" + str(length) + "." + str(length) + "}" return format_specifier.format("NA") + def resume(output_dir: str) -> list: """Resume a batch job from an output directory. @@ -64,6 +63,7 @@ def resume(output_dir: str) -> list: return processed + def crop_and_save_geojson( raster_dir: str, geojson_path: str, @@ -117,6 +117,11 @@ def crop_and_save_geojson( # Crop the GeoJSON to the extent of the raster cropped_geojson = geojson[geojson.geometry.intersects(bbox)] + # Drop id feild from the geojson properties if there are duplicates + if 'id' in cropped_geojson.columns and cropped_geojson['id'].duplicated().any(): + cropped_geojson = cropped_geojson.drop(columns=['id']) + + # Save the cropped GeoJSON with the same naming pattern cropped_geojson_filename = os.path.join( cropped_dir, os.path.basename(raster_file).split(".")[0] + ".geojson" @@ -124,10 +129,12 @@ def crop_and_save_geojson( if os.path.exists(cropped_geojson_filename) and not force_overwrite: continue else: - cropped_geojson.to_file(cropped_geojson_filename, driver="GeoJSON") + if not cropped_geojson.empty: + cropped_geojson.to_file(cropped_geojson_filename, driver="GeoJSON") return cropped_dir + def process_single(args): """The main script with a sinle threaded implementation.""" output_dir = args.output_dir @@ -231,211 +238,6 @@ def process_single(args): return individual_coco_datasets -def process_vector_dir(args): - """ - Check and process the vector directory. - - Args: - - args: Command-line arguments. - - This function checks if the provided vector directory exists. If the directory is not found and the provided file is a GeoJSON file, it crops the GeoJSON file to the extent of the raster file specified. - - Returns: - - args: Updated command-line arguments. - """ - - # Check the vector-dir, and if it is not a dir, and is a single geojson file, then crop it to the extent of the raster file - if not os.path.isdir(args.vector_dir): - if args.vector_dir.endswith(".geojson"): - logger.info( - "The vector-dir is not a directory, and is a geojson file. Cropping it to the extent of the raster file." - ) - args.vector_dir = crop_and_save_geojson( - args.raster_dir, - args.vector_dir, - raster_extension=".tif", - user_crs=args.user_assumed_raster_crs, - force_overwrite=args.force_overwrite, - ) - else: - raise ValueError( - "The vector-dir is not a directory, and is not a geojson file. Please provide a directory or a geojson file." - ) - - return args - -def print_individual_coco_datasets(individual_coco_datasets): - """Print markdown output for individual COCO datasets.""" - print("Running geojson2coco.py over raster and vector pairs:") - print() - print( - "| Raster File | Vector File | JSON File |" - ) - print( - "| ----------------------- | ----------------------- | ----------------------- |" - ) - for coco_file in individual_coco_datasets: - pair_dir = os.path.dirname(coco_file) - raster_file = os.path.basename(pair_dir) + ".tif" - vector_file = os.path.basename(pair_dir) + ".geojson" - print( - f"| {format_string(raster_file,23)} | {format_string(vector_file,23)} | {format_string(coco_file,23)} |" - ) - -def concatenate_datasets(individual_coco_datasets: list[Path], args) -> None: - concatenated_coco = COCO() # Create a new COCO dataset - concatenated_coco.dataset = { - "images": [], - "annotations": [], - "categories": [], - "licenses": [], - "info": {}, - } - - # Fix the category ids in annotations and categories blocks - category_index_checkpoint = 0 - image_index_checkpoint = 0 - annot_index_checkpoint = 0 - for coco_file in tqdm(individual_coco_datasets): - image_index_map = {} - category_index_map = {} - - try: - with open(coco_file, "r") as f: - dataset = json.load(f) - except FileNotFoundError: - print(f"Error: {coco_file} not found.") - continue - - pair_dir = os.path.dirname(coco_file) - raster_name = os.path.basename(pair_dir) - - for image_no, _ in enumerate(dataset["images"]): - dataset["images"][image_no]["file_name"] = os.path.join( - raster_name, dataset["images"][image_no]["file_name"] - ) - - image_index_map[ - dataset["images"][image_no]["id"] - ] = image_index_checkpoint - - dataset["images"][image_no]["id"] = image_index_checkpoint - image_index_checkpoint += 1 - - for _, dataset_category in enumerate(dataset["categories"]): - old_id = dataset_category["id"] - - if dataset_category["name"] not in [ - category["name"] - for category in concatenated_coco.dataset["categories"] - ]: - dataset_category["id"] = category_index_checkpoint - concatenated_coco.dataset["categories"].append(dataset_category) - category_index_map[old_id] = category_index_checkpoint - category_index_checkpoint += 1 - - else: - # find the existing mapping id - existing_mapping_id = None - for category in concatenated_coco.dataset["categories"]: - if category["name"] == dataset_category["name"]: - existing_mapping_id = category["id"] - break - dataset_category["id"] = existing_mapping_id - category_index_map[old_id] = existing_mapping_id - - for annotation_no, _ in enumerate(dataset["annotations"]): - annotation_image_id = dataset["annotations"][annotation_no]["image_id"] - dataset["annotations"][annotation_no]["image_id"] = image_index_map[ - annotation_image_id - ] - dataset["annotations"][annotation_no]["id"] = annot_index_checkpoint - - # make the segnmets list of lists if not already - if not isinstance( - dataset["annotations"][annotation_no]["segmentation"][0], list - ): - dataset["annotations"][annotation_no]["segmentation"] = [ - dataset["annotations"][annotation_no]["segmentation"] - ] - - # fix the annotation category id by the category_index_map - dataset["annotations"][annotation_no][ - "category_id" - ] = category_index_map[ - dataset["annotations"][annotation_no]["category_id"] - ] - - annot_index_checkpoint += 1 - - # Add the dataset to the concatenated COCO dataset - concatenated_coco.dataset["images"].extend(dataset["images"]) - concatenated_coco.dataset["annotations"].extend(dataset["annotations"]) - - # Add the categories to the concatenated COCO dataset if dataset["categories"]["id"] are not already in the concatenated_coco.dataset["categories"]["id"] - for category in dataset["categories"]: - if category["id"] not in [ - category["id"] - for category in concatenated_coco.dataset["categories"] - ]: - concatenated_coco.dataset["categories"].append(category) - try: - concatenated_coco.dataset["licenses"].extend(dataset["licenses"]) - except KeyError: - pass - - try: - concatenated_coco.dataset["info"] = dataset["info"] - except KeyError: - pass - - try: - concatenated_coco.dataset["type"] = dataset["type"] - except KeyError: - pass - - # Specify the output directory for the concatenated dataset - concatenated_output_dir = os.path.join(args.output_dir, "concatenated") - os.makedirs(concatenated_output_dir, exist_ok=True) - - # Save the concatenated COCO dataset - concatenated_json_file = os.path.join( - concatenated_output_dir, "concatenated_coco.json" - ) - with open(concatenated_json_file, "w") as f: - json.dump(concatenated_coco.dataset, f, indent=2) - - print(f"\nConcatenated COCO dataset saved to: {concatenated_json_file}") - - # Add roboflow compatible JSON and png files in a single directory named Roboflow - if args.roboflow_compatible: - roboflow_output_dir = os.path.join(args.output_dir, "Roboflow") - os.makedirs(roboflow_output_dir, exist_ok=True) - - # Save the concatenated COCO dataset as roboflow compatible JSON - roboflow_json_file = os.path.join(roboflow_output_dir, "concatenated.json") - with open(roboflow_json_file, "w") as f: - json.dump(concatenated_coco.dataset, f, indent=2) - - print(f"Roboflow compatible JSON saved to: {roboflow_json_file}") - - # Open the json file as a text file, then replace all image paths with the updated paths (/tile_ -> _tile_), then save it - with open(roboflow_json_file, "r") as f: - roboflow_json = f.read() - roboflow_json = roboflow_json.replace("/tile_", "_tile_") - with open(roboflow_json_file, "w") as f: - f.write(roboflow_json) - - # Copy all png files in the subdirectories to the roboflow_output_dir - in_pattern = roboflow_json_file.replace("concatenated.json", "/**/*.png") - files = glob.glob(in_pattern) - - # Copy files to the out_dir and rename the file to the name of the directory it was in+the file name - for file in tqdm(files): - # print(file) - os.system( - f"cp {file} {os.path.join(roboflow_output_dir,os.path.basename(os.path.dirname(file)))}_{os.path.basename(file)}" - ) def parse_arguments(args): parser = argparse.ArgumentParser( @@ -507,6 +309,7 @@ def parse_arguments(args): return parser.parse_args(args) + def main(args=None): """Convert raster and vector pairs to COCO JSON format. @@ -522,23 +325,204 @@ def main(args=None): """ args = parse_arguments(args) - + # Check the vector-dir, and if it is not a dir, and is a single geojson file, then crop it to the extent of the raster file - args = process_vector_dir(args) + if not os.path.isdir(args.vector_dir): + if args.vector_dir.endswith(".geojson"): + logger.info( + "The vector-dir is not a directory, and is a geojson file. Cropping it to the extent of the raster file." + ) + args.vector_dir = crop_and_save_geojson( + args.raster_dir, + args.vector_dir, + raster_extension=".tif", + user_crs=args.user_assumed_raster_crs, + force_overwrite=args.force_overwrite, + ) + else: + raise ValueError( + "The vector-dir is not a directory, and is not a geojson file. Please provide a directory or a geojson file." + ) - # Specify the output directory + # Specify the output directory if args.no_workers > 1: raise NotImplementedError("Parallel processing not implemented yet.") else: print("Running geojson2coco.py over raster and vector pairs:") individual_coco_datasets = process_single(args) - # # Generate markdown output for individual COCO datasets - print_individual_coco_datasets(individual_coco_datasets) + # Generate markdown output for individual COCO datasets + print("Running geojson2coco.py over raster and vector pairs:") + print() + print( + "| Raster File | Vector File | JSON File |" + ) + print( + "| ----------------------- | ----------------------- | ----------------------- |" + ) + for coco_file in individual_coco_datasets: + pair_dir = os.path.dirname(coco_file) + raster_file = os.path.basename(pair_dir) + ".tif" + vector_file = os.path.basename(pair_dir) + ".geojson" + print( + f"| {format_string(raster_file,23)} | {format_string(vector_file,23)} | {format_string(coco_file,23)} |" + ) + # Concatenate COCO datasets if the --concatenate argument is enabled if args.concatenate: - # Concatenate COCO datasets if the --concatenate argument is enabled - concatenate_datasets(individual_coco_datasets, args) + concatenated_coco = COCO() # Create a new COCO dataset + concatenated_coco.dataset = { + "images": [], + "annotations": [], + "categories": [], + "licenses": [], + "info": {}, + } + + # Fix the category ids in annotations and categories blocks + category_index_checkpoint = 0 + image_index_checkpoint = 0 + annot_index_checkpoint = 0 + for coco_file in tqdm(individual_coco_datasets): + image_index_map = {} + category_index_map = {} + + try: + with open(coco_file, "r") as f: + dataset = json.load(f) + except FileNotFoundError: + print(f"Error: {coco_file} not found.") + continue + + pair_dir = os.path.dirname(coco_file) + raster_name = os.path.basename(pair_dir) + + for image_no, _ in enumerate(dataset["images"]): + dataset["images"][image_no]["file_name"] = os.path.join( + raster_name, dataset["images"][image_no]["file_name"] + ) + + image_index_map[ + dataset["images"][image_no]["id"] + ] = image_index_checkpoint + + dataset["images"][image_no]["id"] = image_index_checkpoint + image_index_checkpoint += 1 + + for _, dataset_category in enumerate(dataset["categories"]): + old_id = dataset_category["id"] + + if dataset_category["name"] not in [ + category["name"] + for category in concatenated_coco.dataset["categories"] + ]: + dataset_category["id"] = category_index_checkpoint + concatenated_coco.dataset["categories"].append(dataset_category) + category_index_map[old_id] = category_index_checkpoint + category_index_checkpoint += 1 + + else: + # find the existing mapping id + existing_mapping_id = None + for category in concatenated_coco.dataset["categories"]: + if category["name"] == dataset_category["name"]: + existing_mapping_id = category["id"] + break + dataset_category["id"] = existing_mapping_id + category_index_map[old_id] = existing_mapping_id + + for annotation_no, _ in enumerate(dataset["annotations"]): + annotation_image_id = dataset["annotations"][annotation_no]["image_id"] + dataset["annotations"][annotation_no]["image_id"] = image_index_map[ + annotation_image_id + ] + dataset["annotations"][annotation_no]["id"] = annot_index_checkpoint + + # make the segnmets list of lists if not already + if not isinstance( + dataset["annotations"][annotation_no]["segmentation"][0], list + ): + dataset["annotations"][annotation_no]["segmentation"] = [ + dataset["annotations"][annotation_no]["segmentation"] + ] + + # fix the annotation category id by the category_index_map + dataset["annotations"][annotation_no][ + "category_id" + ] = category_index_map[ + dataset["annotations"][annotation_no]["category_id"] + ] + + annot_index_checkpoint += 1 + + # Add the dataset to the concatenated COCO dataset + concatenated_coco.dataset["images"].extend(dataset["images"]) + concatenated_coco.dataset["annotations"].extend(dataset["annotations"]) + + # Add the categories to the concatenated COCO dataset if dataset["categories"]["id"] are not already in the concatenated_coco.dataset["categories"]["id"] + for category in dataset["categories"]: + if category["id"] not in [ + category["id"] + for category in concatenated_coco.dataset["categories"] + ]: + concatenated_coco.dataset["categories"].append(category) + try: + concatenated_coco.dataset["licenses"].extend(dataset["licenses"]) + except KeyError: + pass + + try: + concatenated_coco.dataset["info"] = dataset["info"] + except KeyError: + pass + + try: + concatenated_coco.dataset["type"] = dataset["type"] + except KeyError: + pass + + # Specify the output directory for the concatenated dataset + concatenated_output_dir = os.path.join(args.output_dir, "concatenated") + os.makedirs(concatenated_output_dir, exist_ok=True) + + # Save the concatenated COCO dataset + concatenated_json_file = os.path.join( + concatenated_output_dir, "concatenated_coco.json" + ) + with open(concatenated_json_file, "w") as f: + json.dump(concatenated_coco.dataset, f, indent=2) + + print(f"\nConcatenated COCO dataset saved to: {concatenated_json_file}") + + # Add roboflow compatible JSON and png files in a single directory named Roboflow + if args.roboflow_compatible: + roboflow_output_dir = os.path.join(args.output_dir, "Roboflow") + os.makedirs(roboflow_output_dir, exist_ok=True) + + # Save the concatenated COCO dataset as roboflow compatible JSON + roboflow_json_file = os.path.join(roboflow_output_dir, "concatenated.json") + with open(roboflow_json_file, "w") as f: + json.dump(concatenated_coco.dataset, f, indent=2) + + print(f"Roboflow compatible JSON saved to: {roboflow_json_file}") + + # Open the json file as a text file, then replace all image paths with the updated paths (/tile_ -> _tile_), then save it + with open(roboflow_json_file, "r") as f: + roboflow_json = f.read() + roboflow_json = roboflow_json.replace("/tile_", "_tile_") + with open(roboflow_json_file, "w") as f: + f.write(roboflow_json) + + # Copy all png files in the subdirectories to the roboflow_output_dir + in_pattern = roboflow_json_file.replace("concatenated.json", "/**/*.png") + files = glob.glob(in_pattern) + + # Copy files to the out_dir and rename the file to the name of the directory it was in+the file name + for file in tqdm(files): + # print(file) + os.system( + f"cp {file} {os.path.join(roboflow_output_dir,os.path.basename(os.path.dirname(file)))}_{os.path.basename(file)}" + ) if __name__ == "__main__": diff --git a/scripts/coco2geojson.py b/scripts/coco2geojson.py index 194857f..419e36c 100644 --- a/scripts/coco2geojson.py +++ b/scripts/coco2geojson.py @@ -14,7 +14,7 @@ import geopandas as gpd import pandas as pd -from shapely.geometry import Polygon +from shapely.geometry import MultiPolygon, Polygon from shapely.ops import unary_union from tqdm import tqdm @@ -38,6 +38,53 @@ tqdm.pandas() +def convert_multipolygon_to_polygons(geometry): + """Convert a MultiPolygon to a list of Polygons. + + Args: + geometry (shapely.geometry.MultiPolygon): MultiPolygon to convert + + Returns: + list: List of Polygons + """ + + if isinstance(geometry, MultiPolygon): + return list(geometry.geoms) + else: + return [geometry] + + +def multipolygon_to_polygons(gdf): + """Convert a GeoDataFrame with MultiPolygons to a GeoDataFrame with + Polygons. + + Args: + gdf (GeoDataFrame): GeoDataFrame with MultiPolygons + + Returns: + GeoDataFrame: GeoDataFrame with Polygons + """ + # Create an empty list to hold the converted geometries + new_geometries = [] + + # Iterate over each row in the GeoDataFrame + for geometry in tqdm(gdf.geometry, total=gdf.shape[0]): + # If the geometry is a MultiPolygon + if isinstance(geometry, MultiPolygon): + # Convert each part of the MultiPolygon into a separate Polygon + for polygon in geometry: + new_geometries.append(polygon) + else: + # If it's not a MultiPolygon, just append the original geometry + new_geometries.append(geometry) + + # Create a new GeoDataFrame with the converted geometries + new_gdf = gpd.GeoDataFrame(geometry=new_geometries) + new_gdf.crs = gdf.crs + + return new_gdf + + def merge_class_polygons_geopandas(tiles_df_zone_groups, crs, keep_geom_type): """Merge overlapping polygons in each class/zone. @@ -114,12 +161,11 @@ def merge_class_polygons_shapely(tiles_df_zone_groups, crs): polygons_df (GeoDataFrame): GeoDataFrame of merged polygons """ print( - "Using the unary_union method to merge overlapping polygons in each class/zone." + "Using the unary_union method to merge overlapping polygons in each class/zone.\n" ) # polygons_df_zone_groups = [] - for index, tiles_df_zone in tqdm( - enumerate(tiles_df_zone_groups), total=len(tiles_df_zone_groups) - ): + for index, tiles_df_zone in enumerate(tiles_df_zone_groups): + print(f"Processing zone {index+1} of {len(tiles_df_zone_groups)}") tiles_df_zone = tiles_df_zone.reset_index(drop=True) # Convert segmentations to polygons @@ -130,10 +176,23 @@ def merge_class_polygons_shapely(tiles_df_zone_groups, crs): axis=1, ) + # Validate geometries + valid_geometries = [] + for geom in tqdm(tiles_df_zone["geometry"], total=tiles_df_zone.shape[0]): + # Check if geometry is valid + if not geom.is_valid: + print(f"Invalid geometry found at index {index}: {geom}") + # Attempt to fix the invalid geometry + geom = geom.buffer(0) + if not geom.is_valid: + print(f"Unable to fix geometry at index {index}, skipping") + continue + valid_geometries.append(geom) + # Merge overlapping polygons in each class/zone zone_name = tiles_df_zone["zone_name"][0] zone_code = tiles_df_zone["zone_code"][0] - multipolygon = unary_union(tiles_df_zone["geometry"]) + multipolygon = unary_union(valid_geometries) # polygons = list(multipolygon.geoms) if index == 0: @@ -171,13 +230,16 @@ def shape_regulariser( shapely.geometry.Polygon: Regularised polygon """ polygon_point_tuples = list(polygon.exterior.coords) - polygon = polygon_prep( - polygon_point_tuples, - simplify_tolerance, - minimum_rotated_rectangle, - orthogonalisation, - ) - polygon = Polygon(polygon) + try: + polygon = polygon_prep( + polygon_point_tuples, + simplify_tolerance, + minimum_rotated_rectangle, + orthogonalisation, + ) + polygon = Polygon(polygon) + except Exception as e: + log.error(f"Could not regularise the polygon. Error message: {e}") return polygon @@ -187,17 +249,16 @@ def shape_regulariser( def main(args=None): """Command-line driver.""" - test_data_path = "/home/sahand/Data/GIS2COCO/chatswood/big_tiles_200_b/" + # test_data_path = "/home/sahand/Data/GIS2COCO/chatswood/big_tiles_200_b/" ap = argparse.ArgumentParser(description=__doc__) ap.add_argument( "tiledir", type=Path, - default=test_data_path, help="Path to the input tiles directory with rasters. PNG files are not required.", ) ap.add_argument( "cocojson", - default=os.path.join(test_data_path, "coco-out-tol_0.4-b.json"), + # default=os.path.join(test_data_path, "coco-out-tol_0.4-b.json"), type=Path, help="Path to the input coco json file.", ) @@ -210,14 +271,17 @@ def main(args=None): ap.add_argument( "--geojson-output", "-o", - # required=True, - default=os.path.join( - test_data_path, - f"coco_2_geojson_{datetime.today().strftime('%Y-%m-%d')}.geojson", - ), + default=None, type=Path, help="Path to output geojson file.", ) + ap.add_argument( + "--geoparquet-output", + "-p", + default=None, + type=Path, + help="Path to output geoparquet file.", + ) ap.add_argument( "--tile-search-margin", default=0, @@ -280,6 +344,7 @@ def main(args=None): Read tiles and COCO JSON, and convert to GeoJSON. """ geojson_path = args.geojson_output + geopardquet_path = args.geoparquet_output tile_dir = args.tiledir meta_name = args.meta_name coco_json_path = args.cocojson @@ -288,6 +353,23 @@ def main(args=None): minimum_rotated_rectangle = args.minimum_rotated_rectangle orthogonalisation = args.orthogonalisation + if geojson_path is None and geopardquet_path is None: + geojson_path = os.path.join( + f"coco_2_geojson_{datetime.today().strftime('%Y-%m-%d')}.geojson", + ) + + print("Arguments:") + print(f"> Reading tiles from {tile_dir}") + print(f"> Reading COCO JSON from {coco_json_path}") + print(f"> Simplify tolerance (float): {float(simplify_tolerance)}") + print(f"> Simplify tolerance: {simplify_tolerance}") + print(f"> Minimum rotated rectangle: {minimum_rotated_rectangle}") + print(f"> Orthogonalisation: {orthogonalisation}") + print(f"> Writing Geoparquet to: {geopardquet_path}") + print(f"> Writing GeoJSON to {geojson_path}") + + simplify_tolerance = float(simplify_tolerance) + # keep_geom_type = ( # not args.not_keep_geom_type # ) # should be True # only meaningful when using geopandas overlay method @@ -338,10 +420,47 @@ def main(args=None): polygons_df = merge_class_polygons_shapely(tiles_df_zone_groups, crs) # polygons_df = merge_class_polygons_geopandas(tiles_df_zone_groups,crs,keep_geom_type) # geopandas overlay method -- slow - # change crs of the gpd and its geometries to "epsg:4326" + # Save to geojson before orthogonalisation + + if ( + simplify_tolerance > 0 + or minimum_rotated_rectangle is True + or orthogonalisation is True + ): + try: + polygons_df.to_file( + geojson_path.replace(".gejson", "-presimplification.geojson"), + driver="GeoJSON", + ) + except Exception as e: + log.error(f"Could not save the raw file to geojson. Error message: {e}") + + if ( + simplify_tolerance > 0 + or minimum_rotated_rectangle is True + or orthogonalisation is True + ): + try: + polygons_df.to_parquet( + geopardquet_path.replace(".geoparquet", "-presimplification.geoparquet") + ) + except Exception as e: + log.error(f"Could not save the raw file to geoparquet. Error message: {e}") + + # change crs of the gpd and its geometries to "epsg:4326" temporarily original_crs = polygons_df.crs polygons_df = polygons_df.to_crs("epsg:4326") + # Change multipolygons to polygons + print("Converting MultiPolygons to Polygons.") + # polygons_df["geometry"] = ( + # polygons_df["geometry"] + # .apply(lambda geom: convert_multipolygon_to_polygons(geom)) + # .explode() + # .reset_index(drop=True) + # ) + polygons_df = multipolygon_to_polygons(polygons_df) + print("Regularising the shape of the polygons.") # print(polygons_df) polygons_df["geometry"] = polygons_df["geometry"].progress_apply( @@ -359,8 +478,12 @@ def main(args=None): except Exception as e: log.error(f"Could not set Name property of geojson. Error message: {e}") print("FIX this code!") + # Save to geojson - polygons_df.to_file(geojson_path, driver="GeoJSON") + if geojson_path is not None: + polygons_df.to_file(geojson_path, driver="GeoJSON") + if geopardquet_path is not None: + polygons_df.to_parquet(geopardquet_path) if __name__ == "__main__": diff --git a/scripts/geojson2coco.py b/scripts/geojson2coco.py index 5a77084..4cfce63 100644 --- a/scripts/geojson2coco.py +++ b/scripts/geojson2coco.py @@ -218,6 +218,9 @@ def main(args=None): log.debug("Class id is: %s", geojson["class_id"]) log.debug("Trim class is: %s", trim_class) categories_json = make_category_object(geojson, class_column, trim_class) + + # Make sure geojson class_column is string type + geojson[class_column] = geojson[class_column].astype(str) # If license is not supplied, use MIT by default if license is None: