diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..81322db
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,36 @@
+tqdm
+pandas
+pillow
+matplotlib
+shapely
+geopandas
+rioxarray
+rasterio
+geoplot
+pyarrow
+fiona
+pre-commit
+Sphinx
+sphinx_rtd_theme
+pycocotools
+funcy
+argparse
+scikit-learn
+scikit-multilearn
+netron
+onnx
+onnxruntime
+opencv-python
+roboflow
+torch
+torchvision
+wandb
+geojson
+groundingdino-py
+mercantile
+numpy
+osmtogeojson
+overpass
+overpy
+requests
+segment-geospatial
diff --git a/scripts/batch_geojson2coco.py b/scripts/batch_geojson2coco.py
index 944a50a..6e2f104 100644
--- a/scripts/batch_geojson2coco.py
+++ b/scripts/batch_geojson2coco.py
@@ -15,8 +15,6 @@
 from pycocotools.coco import COCO
 from shapely.geometry import box
 from tqdm import tqdm
-from pathlib import Path
-
 
 from aigis.convert.coordinates import wkt_parser
 
@@ -38,6 +36,7 @@ def format_string(s, length=23):
         format_specifier = "{:<" + str(length) + "." + str(length) + "}"
         return format_specifier.format("NA")
 
+
 def resume(output_dir: str) -> list:
     """Resume a batch job from an output directory.
 
@@ -64,6 +63,7 @@ def resume(output_dir: str) -> list:
 
     return processed
 
+
 def crop_and_save_geojson(
     raster_dir: str,
     geojson_path: str,
@@ -117,6 +117,11 @@ def crop_and_save_geojson(
             # Crop the GeoJSON to the extent of the raster
             cropped_geojson = geojson[geojson.geometry.intersects(bbox)]
 
+            # Drop id feild from the geojson properties if there are duplicates
+            if 'id' in cropped_geojson.columns and cropped_geojson['id'].duplicated().any():
+                cropped_geojson = cropped_geojson.drop(columns=['id'])
+            
+
             # Save the cropped GeoJSON with the same naming pattern
             cropped_geojson_filename = os.path.join(
                 cropped_dir, os.path.basename(raster_file).split(".")[0] + ".geojson"
@@ -124,10 +129,12 @@ def crop_and_save_geojson(
             if os.path.exists(cropped_geojson_filename) and not force_overwrite:
                 continue
             else:
-                cropped_geojson.to_file(cropped_geojson_filename, driver="GeoJSON")
+                if not cropped_geojson.empty:
+                    cropped_geojson.to_file(cropped_geojson_filename, driver="GeoJSON")
 
     return cropped_dir
 
+
 def process_single(args):
     """The main script with a sinle threaded implementation."""
     output_dir = args.output_dir
@@ -231,211 +238,6 @@ def process_single(args):
 
     return individual_coco_datasets
 
-def process_vector_dir(args):
-    """
-    Check and process the vector directory.
-
-    Args:
-    - args: Command-line arguments.
-
-    This function checks if the provided vector directory exists. If the directory is not found and the provided file is a GeoJSON file, it crops the GeoJSON file to the extent of the raster file specified.
-
-    Returns:
-    - args: Updated command-line arguments.
-    """
-
-    # Check the vector-dir, and if it is not a dir, and is a single geojson file, then crop it to the extent of the raster file
-    if not os.path.isdir(args.vector_dir):
-        if args.vector_dir.endswith(".geojson"):
-            logger.info(
-                "The vector-dir is not a directory, and is a geojson file. Cropping it to the extent of the raster file."
-            )
-            args.vector_dir = crop_and_save_geojson(
-                args.raster_dir,
-                args.vector_dir,
-                raster_extension=".tif",
-                user_crs=args.user_assumed_raster_crs,
-                force_overwrite=args.force_overwrite,
-            )
-        else:
-            raise ValueError(
-                "The vector-dir is not a directory, and is not a geojson file. Please provide a directory or a geojson file."
-            )
-    
-    return args
-
-def print_individual_coco_datasets(individual_coco_datasets):
-    """Print markdown output for individual COCO datasets."""
-    print("Running geojson2coco.py over raster and vector pairs:")
-    print()
-    print(
-        "|       Raster File       |       Vector File       |        JSON File        |"
-    )
-    print(
-        "| ----------------------- | ----------------------- | ----------------------- |"
-    )
-    for coco_file in individual_coco_datasets:
-        pair_dir = os.path.dirname(coco_file)
-        raster_file = os.path.basename(pair_dir) + ".tif"
-        vector_file = os.path.basename(pair_dir) + ".geojson"
-        print(
-            f"| {format_string(raster_file,23)} | {format_string(vector_file,23)} | {format_string(coco_file,23)} |"
-        )
-
-def concatenate_datasets(individual_coco_datasets: list[Path], args) -> None:
-    concatenated_coco = COCO()  # Create a new COCO dataset
-    concatenated_coco.dataset = {
-        "images": [],
-        "annotations": [],
-        "categories": [],
-        "licenses": [],
-        "info": {},
-    }
-
-    # Fix the category ids in annotations and categories blocks
-    category_index_checkpoint = 0
-    image_index_checkpoint = 0
-    annot_index_checkpoint = 0
-    for coco_file in tqdm(individual_coco_datasets):
-        image_index_map = {}
-        category_index_map = {}
-
-        try:
-            with open(coco_file, "r") as f:
-                dataset = json.load(f)
-        except FileNotFoundError:
-            print(f"Error: {coco_file} not found.")
-            continue
-
-        pair_dir = os.path.dirname(coco_file)
-        raster_name = os.path.basename(pair_dir)
-
-        for image_no, _ in enumerate(dataset["images"]):
-            dataset["images"][image_no]["file_name"] = os.path.join(
-                raster_name, dataset["images"][image_no]["file_name"]
-            )
-
-            image_index_map[
-                dataset["images"][image_no]["id"]
-            ] = image_index_checkpoint
-
-            dataset["images"][image_no]["id"] = image_index_checkpoint
-            image_index_checkpoint += 1
-
-        for _, dataset_category in enumerate(dataset["categories"]):
-            old_id = dataset_category["id"]
-
-            if dataset_category["name"] not in [
-                category["name"]
-                for category in concatenated_coco.dataset["categories"]
-            ]:
-                dataset_category["id"] = category_index_checkpoint
-                concatenated_coco.dataset["categories"].append(dataset_category)
-                category_index_map[old_id] = category_index_checkpoint
-                category_index_checkpoint += 1
-
-            else:
-                # find the existing mapping id
-                existing_mapping_id = None
-                for category in concatenated_coco.dataset["categories"]:
-                    if category["name"] == dataset_category["name"]:
-                        existing_mapping_id = category["id"]
-                        break
-                dataset_category["id"] = existing_mapping_id
-                category_index_map[old_id] = existing_mapping_id
-
-        for annotation_no, _ in enumerate(dataset["annotations"]):
-            annotation_image_id = dataset["annotations"][annotation_no]["image_id"]
-            dataset["annotations"][annotation_no]["image_id"] = image_index_map[
-                annotation_image_id
-            ]
-            dataset["annotations"][annotation_no]["id"] = annot_index_checkpoint
-
-            # make the segnmets list of lists if not already
-            if not isinstance(
-                dataset["annotations"][annotation_no]["segmentation"][0], list
-            ):
-                dataset["annotations"][annotation_no]["segmentation"] = [
-                    dataset["annotations"][annotation_no]["segmentation"]
-                ]
-
-            # fix the annotation category id by the category_index_map
-            dataset["annotations"][annotation_no][
-                "category_id"
-            ] = category_index_map[
-                dataset["annotations"][annotation_no]["category_id"]
-            ]
-
-            annot_index_checkpoint += 1
-
-        # Add the dataset to the concatenated COCO dataset
-        concatenated_coco.dataset["images"].extend(dataset["images"])
-        concatenated_coco.dataset["annotations"].extend(dataset["annotations"])
-
-        # Add the categories to the concatenated COCO dataset if dataset["categories"]["id"] are not already in the concatenated_coco.dataset["categories"]["id"]
-        for category in dataset["categories"]:
-            if category["id"] not in [
-                category["id"]
-                for category in concatenated_coco.dataset["categories"]
-            ]:
-                concatenated_coco.dataset["categories"].append(category)
-        try:
-            concatenated_coco.dataset["licenses"].extend(dataset["licenses"])
-        except KeyError:
-            pass
-
-        try:
-            concatenated_coco.dataset["info"] = dataset["info"]
-        except KeyError:
-            pass
-
-        try:
-            concatenated_coco.dataset["type"] = dataset["type"]
-        except KeyError:
-            pass
-
-    # Specify the output directory for the concatenated dataset
-    concatenated_output_dir = os.path.join(args.output_dir, "concatenated")
-    os.makedirs(concatenated_output_dir, exist_ok=True)
-
-    # Save the concatenated COCO dataset
-    concatenated_json_file = os.path.join(
-        concatenated_output_dir, "concatenated_coco.json"
-    )
-    with open(concatenated_json_file, "w") as f:
-        json.dump(concatenated_coco.dataset, f, indent=2)
-
-    print(f"\nConcatenated COCO dataset saved to: {concatenated_json_file}")
-
-    # Add roboflow compatible JSON and png files in a single directory named Roboflow
-    if args.roboflow_compatible:
-        roboflow_output_dir = os.path.join(args.output_dir, "Roboflow")
-        os.makedirs(roboflow_output_dir, exist_ok=True)
-
-        # Save the concatenated COCO dataset as roboflow compatible JSON
-        roboflow_json_file = os.path.join(roboflow_output_dir, "concatenated.json")
-        with open(roboflow_json_file, "w") as f:
-            json.dump(concatenated_coco.dataset, f, indent=2)
-
-        print(f"Roboflow compatible JSON saved to: {roboflow_json_file}")
-
-        # Open the json file as a text file, then replace all image paths with the updated paths (/tile_ -> _tile_), then save it
-        with open(roboflow_json_file, "r") as f:
-            roboflow_json = f.read()
-        roboflow_json = roboflow_json.replace("/tile_", "_tile_")
-        with open(roboflow_json_file, "w") as f:
-            f.write(roboflow_json)
-
-        # Copy all png files in the subdirectories to the roboflow_output_dir
-        in_pattern = roboflow_json_file.replace("concatenated.json", "/**/*.png")
-        files = glob.glob(in_pattern)
-
-        # Copy files to the out_dir and rename the file to the name of the directory it was in+the file name
-        for file in tqdm(files):
-            # print(file)
-            os.system(
-                f"cp {file} {os.path.join(roboflow_output_dir,os.path.basename(os.path.dirname(file)))}_{os.path.basename(file)}"
-            )
 
 def parse_arguments(args):
     parser = argparse.ArgumentParser(
@@ -507,6 +309,7 @@ def parse_arguments(args):
 
     return parser.parse_args(args)
 
+
 def main(args=None):
     """Convert raster and vector pairs to COCO JSON format.
 
@@ -522,23 +325,204 @@ def main(args=None):
     """
 
     args = parse_arguments(args)
-    
+
     # Check the vector-dir, and if it is not a dir, and is a single geojson file, then crop it to the extent of the raster file
-    args = process_vector_dir(args)
+    if not os.path.isdir(args.vector_dir):
+        if args.vector_dir.endswith(".geojson"):
+            logger.info(
+                "The vector-dir is not a directory, and is a geojson file. Cropping it to the extent of the raster file."
+            )
+            args.vector_dir = crop_and_save_geojson(
+                args.raster_dir,
+                args.vector_dir,
+                raster_extension=".tif",
+                user_crs=args.user_assumed_raster_crs,
+                force_overwrite=args.force_overwrite,
+            )
+        else:
+            raise ValueError(
+                "The vector-dir is not a directory, and is not a geojson file. Please provide a directory or a geojson file."
+            )
 
-    # Specify the output directory
+        # Specify the output directory
     if args.no_workers > 1:
         raise NotImplementedError("Parallel processing not implemented yet.")
     else:
         print("Running geojson2coco.py over raster and vector pairs:")
         individual_coco_datasets = process_single(args)
 
-    # # Generate markdown output for individual COCO datasets
-    print_individual_coco_datasets(individual_coco_datasets)
+    # Generate markdown output for individual COCO datasets
+    print("Running geojson2coco.py over raster and vector pairs:")
+    print()
+    print(
+        "|       Raster File       |       Vector File       |        JSON File        |"
+    )
+    print(
+        "| ----------------------- | ----------------------- | ----------------------- |"
+    )
+    for coco_file in individual_coco_datasets:
+        pair_dir = os.path.dirname(coco_file)
+        raster_file = os.path.basename(pair_dir) + ".tif"
+        vector_file = os.path.basename(pair_dir) + ".geojson"
+        print(
+            f"| {format_string(raster_file,23)} | {format_string(vector_file,23)} | {format_string(coco_file,23)} |"
+        )
 
+    # Concatenate COCO datasets if the --concatenate argument is enabled
     if args.concatenate:
-        # Concatenate COCO datasets if the --concatenate argument is enabled
-        concatenate_datasets(individual_coco_datasets, args)
+        concatenated_coco = COCO()  # Create a new COCO dataset
+        concatenated_coco.dataset = {
+            "images": [],
+            "annotations": [],
+            "categories": [],
+            "licenses": [],
+            "info": {},
+        }
+
+        # Fix the category ids in annotations and categories blocks
+        category_index_checkpoint = 0
+        image_index_checkpoint = 0
+        annot_index_checkpoint = 0
+        for coco_file in tqdm(individual_coco_datasets):
+            image_index_map = {}
+            category_index_map = {}
+
+            try:
+                with open(coco_file, "r") as f:
+                    dataset = json.load(f)
+            except FileNotFoundError:
+                print(f"Error: {coco_file} not found.")
+                continue
+
+            pair_dir = os.path.dirname(coco_file)
+            raster_name = os.path.basename(pair_dir)
+
+            for image_no, _ in enumerate(dataset["images"]):
+                dataset["images"][image_no]["file_name"] = os.path.join(
+                    raster_name, dataset["images"][image_no]["file_name"]
+                )
+
+                image_index_map[
+                    dataset["images"][image_no]["id"]
+                ] = image_index_checkpoint
+
+                dataset["images"][image_no]["id"] = image_index_checkpoint
+                image_index_checkpoint += 1
+
+            for _, dataset_category in enumerate(dataset["categories"]):
+                old_id = dataset_category["id"]
+
+                if dataset_category["name"] not in [
+                    category["name"]
+                    for category in concatenated_coco.dataset["categories"]
+                ]:
+                    dataset_category["id"] = category_index_checkpoint
+                    concatenated_coco.dataset["categories"].append(dataset_category)
+                    category_index_map[old_id] = category_index_checkpoint
+                    category_index_checkpoint += 1
+
+                else:
+                    # find the existing mapping id
+                    existing_mapping_id = None
+                    for category in concatenated_coco.dataset["categories"]:
+                        if category["name"] == dataset_category["name"]:
+                            existing_mapping_id = category["id"]
+                            break
+                    dataset_category["id"] = existing_mapping_id
+                    category_index_map[old_id] = existing_mapping_id
+
+            for annotation_no, _ in enumerate(dataset["annotations"]):
+                annotation_image_id = dataset["annotations"][annotation_no]["image_id"]
+                dataset["annotations"][annotation_no]["image_id"] = image_index_map[
+                    annotation_image_id
+                ]
+                dataset["annotations"][annotation_no]["id"] = annot_index_checkpoint
+
+                # make the segnmets list of lists if not already
+                if not isinstance(
+                    dataset["annotations"][annotation_no]["segmentation"][0], list
+                ):
+                    dataset["annotations"][annotation_no]["segmentation"] = [
+                        dataset["annotations"][annotation_no]["segmentation"]
+                    ]
+
+                # fix the annotation category id by the category_index_map
+                dataset["annotations"][annotation_no][
+                    "category_id"
+                ] = category_index_map[
+                    dataset["annotations"][annotation_no]["category_id"]
+                ]
+
+                annot_index_checkpoint += 1
+
+            # Add the dataset to the concatenated COCO dataset
+            concatenated_coco.dataset["images"].extend(dataset["images"])
+            concatenated_coco.dataset["annotations"].extend(dataset["annotations"])
+
+            # Add the categories to the concatenated COCO dataset if dataset["categories"]["id"] are not already in the concatenated_coco.dataset["categories"]["id"]
+            for category in dataset["categories"]:
+                if category["id"] not in [
+                    category["id"]
+                    for category in concatenated_coco.dataset["categories"]
+                ]:
+                    concatenated_coco.dataset["categories"].append(category)
+            try:
+                concatenated_coco.dataset["licenses"].extend(dataset["licenses"])
+            except KeyError:
+                pass
+
+            try:
+                concatenated_coco.dataset["info"] = dataset["info"]
+            except KeyError:
+                pass
+
+            try:
+                concatenated_coco.dataset["type"] = dataset["type"]
+            except KeyError:
+                pass
+
+        # Specify the output directory for the concatenated dataset
+        concatenated_output_dir = os.path.join(args.output_dir, "concatenated")
+        os.makedirs(concatenated_output_dir, exist_ok=True)
+
+        # Save the concatenated COCO dataset
+        concatenated_json_file = os.path.join(
+            concatenated_output_dir, "concatenated_coco.json"
+        )
+        with open(concatenated_json_file, "w") as f:
+            json.dump(concatenated_coco.dataset, f, indent=2)
+
+        print(f"\nConcatenated COCO dataset saved to: {concatenated_json_file}")
+
+        # Add roboflow compatible JSON and png files in a single directory named Roboflow
+        if args.roboflow_compatible:
+            roboflow_output_dir = os.path.join(args.output_dir, "Roboflow")
+            os.makedirs(roboflow_output_dir, exist_ok=True)
+
+            # Save the concatenated COCO dataset as roboflow compatible JSON
+            roboflow_json_file = os.path.join(roboflow_output_dir, "concatenated.json")
+            with open(roboflow_json_file, "w") as f:
+                json.dump(concatenated_coco.dataset, f, indent=2)
+
+            print(f"Roboflow compatible JSON saved to: {roboflow_json_file}")
+
+            # Open the json file as a text file, then replace all image paths with the updated paths (/tile_ -> _tile_), then save it
+            with open(roboflow_json_file, "r") as f:
+                roboflow_json = f.read()
+            roboflow_json = roboflow_json.replace("/tile_", "_tile_")
+            with open(roboflow_json_file, "w") as f:
+                f.write(roboflow_json)
+
+            # Copy all png files in the subdirectories to the roboflow_output_dir
+            in_pattern = roboflow_json_file.replace("concatenated.json", "/**/*.png")
+            files = glob.glob(in_pattern)
+
+            # Copy files to the out_dir and rename the file to the name of the directory it was in+the file name
+            for file in tqdm(files):
+                # print(file)
+                os.system(
+                    f"cp {file} {os.path.join(roboflow_output_dir,os.path.basename(os.path.dirname(file)))}_{os.path.basename(file)}"
+                )
 
 
 if __name__ == "__main__":
diff --git a/scripts/coco2geojson.py b/scripts/coco2geojson.py
index 194857f..419e36c 100644
--- a/scripts/coco2geojson.py
+++ b/scripts/coco2geojson.py
@@ -14,7 +14,7 @@
 
 import geopandas as gpd
 import pandas as pd
-from shapely.geometry import Polygon
+from shapely.geometry import MultiPolygon, Polygon
 from shapely.ops import unary_union
 from tqdm import tqdm
 
@@ -38,6 +38,53 @@
 tqdm.pandas()
 
 
+def convert_multipolygon_to_polygons(geometry):
+    """Convert a MultiPolygon to a list of Polygons.
+
+    Args:
+        geometry (shapely.geometry.MultiPolygon): MultiPolygon to convert
+
+    Returns:
+        list: List of Polygons
+    """
+
+    if isinstance(geometry, MultiPolygon):
+        return list(geometry.geoms)
+    else:
+        return [geometry]
+
+
+def multipolygon_to_polygons(gdf):
+    """Convert a GeoDataFrame with MultiPolygons to a GeoDataFrame with
+    Polygons.
+
+    Args:
+        gdf (GeoDataFrame): GeoDataFrame with MultiPolygons
+
+    Returns:
+        GeoDataFrame: GeoDataFrame with Polygons
+    """
+    # Create an empty list to hold the converted geometries
+    new_geometries = []
+
+    # Iterate over each row in the GeoDataFrame
+    for geometry in tqdm(gdf.geometry, total=gdf.shape[0]):
+        # If the geometry is a MultiPolygon
+        if isinstance(geometry, MultiPolygon):
+            # Convert each part of the MultiPolygon into a separate Polygon
+            for polygon in geometry:
+                new_geometries.append(polygon)
+        else:
+            # If it's not a MultiPolygon, just append the original geometry
+            new_geometries.append(geometry)
+
+    # Create a new GeoDataFrame with the converted geometries
+    new_gdf = gpd.GeoDataFrame(geometry=new_geometries)
+    new_gdf.crs = gdf.crs
+
+    return new_gdf
+
+
 def merge_class_polygons_geopandas(tiles_df_zone_groups, crs, keep_geom_type):
     """Merge overlapping polygons in each class/zone.
 
@@ -114,12 +161,11 @@ def merge_class_polygons_shapely(tiles_df_zone_groups, crs):
         polygons_df (GeoDataFrame): GeoDataFrame of merged polygons
     """
     print(
-        "Using the unary_union method to merge overlapping polygons in each class/zone."
+        "Using the unary_union method to merge overlapping polygons in each class/zone.\n"
     )
     # polygons_df_zone_groups = []
-    for index, tiles_df_zone in tqdm(
-        enumerate(tiles_df_zone_groups), total=len(tiles_df_zone_groups)
-    ):
+    for index, tiles_df_zone in enumerate(tiles_df_zone_groups):
+        print(f"Processing zone {index+1} of {len(tiles_df_zone_groups)}")
         tiles_df_zone = tiles_df_zone.reset_index(drop=True)
 
         # Convert segmentations to polygons
@@ -130,10 +176,23 @@ def merge_class_polygons_shapely(tiles_df_zone_groups, crs):
             axis=1,
         )
 
+        # Validate geometries
+        valid_geometries = []
+        for geom in tqdm(tiles_df_zone["geometry"], total=tiles_df_zone.shape[0]):
+            # Check if geometry is valid
+            if not geom.is_valid:
+                print(f"Invalid geometry found at index {index}: {geom}")
+                # Attempt to fix the invalid geometry
+                geom = geom.buffer(0)
+                if not geom.is_valid:
+                    print(f"Unable to fix geometry at index {index}, skipping")
+                    continue
+            valid_geometries.append(geom)
+
         # Merge overlapping polygons in each class/zone
         zone_name = tiles_df_zone["zone_name"][0]
         zone_code = tiles_df_zone["zone_code"][0]
-        multipolygon = unary_union(tiles_df_zone["geometry"])
+        multipolygon = unary_union(valid_geometries)
         # polygons = list(multipolygon.geoms)
 
         if index == 0:
@@ -171,13 +230,16 @@ def shape_regulariser(
         shapely.geometry.Polygon: Regularised polygon
     """
     polygon_point_tuples = list(polygon.exterior.coords)
-    polygon = polygon_prep(
-        polygon_point_tuples,
-        simplify_tolerance,
-        minimum_rotated_rectangle,
-        orthogonalisation,
-    )
-    polygon = Polygon(polygon)
+    try:
+        polygon = polygon_prep(
+            polygon_point_tuples,
+            simplify_tolerance,
+            minimum_rotated_rectangle,
+            orthogonalisation,
+        )
+        polygon = Polygon(polygon)
+    except Exception as e:
+        log.error(f"Could not regularise the polygon. Error message: {e}")
 
     return polygon
 
@@ -187,17 +249,16 @@ def shape_regulariser(
 
 def main(args=None):
     """Command-line driver."""
-    test_data_path = "/home/sahand/Data/GIS2COCO/chatswood/big_tiles_200_b/"
+    # test_data_path = "/home/sahand/Data/GIS2COCO/chatswood/big_tiles_200_b/"
     ap = argparse.ArgumentParser(description=__doc__)
     ap.add_argument(
         "tiledir",
         type=Path,
-        default=test_data_path,
         help="Path to the input tiles directory with rasters. PNG files are not required.",
     )
     ap.add_argument(
         "cocojson",
-        default=os.path.join(test_data_path, "coco-out-tol_0.4-b.json"),
+        # default=os.path.join(test_data_path, "coco-out-tol_0.4-b.json"),
         type=Path,
         help="Path to the input coco json file.",
     )
@@ -210,14 +271,17 @@ def main(args=None):
     ap.add_argument(
         "--geojson-output",
         "-o",
-        # required=True,
-        default=os.path.join(
-            test_data_path,
-            f"coco_2_geojson_{datetime.today().strftime('%Y-%m-%d')}.geojson",
-        ),
+        default=None,
         type=Path,
         help="Path to output geojson file.",
     )
+    ap.add_argument(
+        "--geoparquet-output",
+        "-p",
+        default=None,
+        type=Path,
+        help="Path to output geoparquet file.",
+    )
     ap.add_argument(
         "--tile-search-margin",
         default=0,
@@ -280,6 +344,7 @@ def main(args=None):
     Read tiles and COCO JSON, and convert to GeoJSON.
     """
     geojson_path = args.geojson_output
+    geopardquet_path = args.geoparquet_output
     tile_dir = args.tiledir
     meta_name = args.meta_name
     coco_json_path = args.cocojson
@@ -288,6 +353,23 @@ def main(args=None):
     minimum_rotated_rectangle = args.minimum_rotated_rectangle
     orthogonalisation = args.orthogonalisation
 
+    if geojson_path is None and geopardquet_path is None:
+        geojson_path = os.path.join(
+            f"coco_2_geojson_{datetime.today().strftime('%Y-%m-%d')}.geojson",
+        )
+
+    print("Arguments:")
+    print(f"> Reading tiles from {tile_dir}")
+    print(f"> Reading COCO JSON from {coco_json_path}")
+    print(f"> Simplify tolerance (float): {float(simplify_tolerance)}")
+    print(f"> Simplify tolerance: {simplify_tolerance}")
+    print(f"> Minimum rotated rectangle: {minimum_rotated_rectangle}")
+    print(f"> Orthogonalisation: {orthogonalisation}")
+    print(f"> Writing Geoparquet to: {geopardquet_path}")
+    print(f"> Writing GeoJSON to {geojson_path}")
+
+    simplify_tolerance = float(simplify_tolerance)
+
     # keep_geom_type = (
     #     not args.not_keep_geom_type
     # )  # should be True # only meaningful when using geopandas overlay method
@@ -338,10 +420,47 @@ def main(args=None):
     polygons_df = merge_class_polygons_shapely(tiles_df_zone_groups, crs)
     # polygons_df = merge_class_polygons_geopandas(tiles_df_zone_groups,crs,keep_geom_type) # geopandas overlay method -- slow
 
-    # change crs of the gpd and its geometries to "epsg:4326"
+    # Save to geojson before orthogonalisation
+
+    if (
+        simplify_tolerance > 0
+        or minimum_rotated_rectangle is True
+        or orthogonalisation is True
+    ):
+        try:
+            polygons_df.to_file(
+                geojson_path.replace(".gejson", "-presimplification.geojson"),
+                driver="GeoJSON",
+            )
+        except Exception as e:
+            log.error(f"Could not save the raw file to geojson. Error message: {e}")
+
+    if (
+        simplify_tolerance > 0
+        or minimum_rotated_rectangle is True
+        or orthogonalisation is True
+    ):
+        try:
+            polygons_df.to_parquet(
+                geopardquet_path.replace(".geoparquet", "-presimplification.geoparquet")
+            )
+        except Exception as e:
+            log.error(f"Could not save the raw file to geoparquet. Error message: {e}")
+
+    # change crs of the gpd and its geometries to "epsg:4326" temporarily
     original_crs = polygons_df.crs
     polygons_df = polygons_df.to_crs("epsg:4326")
 
+    # Change multipolygons to polygons
+    print("Converting MultiPolygons to Polygons.")
+    # polygons_df["geometry"] = (
+    #     polygons_df["geometry"]
+    #     .apply(lambda geom: convert_multipolygon_to_polygons(geom))
+    #     .explode()
+    #     .reset_index(drop=True)
+    # )
+    polygons_df = multipolygon_to_polygons(polygons_df)
+
     print("Regularising the shape of the polygons.")
     # print(polygons_df)
     polygons_df["geometry"] = polygons_df["geometry"].progress_apply(
@@ -359,8 +478,12 @@ def main(args=None):
     except Exception as e:
         log.error(f"Could not set Name property of geojson. Error message: {e}")
         print("FIX this code!")
+
     # Save to geojson
-    polygons_df.to_file(geojson_path, driver="GeoJSON")
+    if geojson_path is not None:
+        polygons_df.to_file(geojson_path, driver="GeoJSON")
+    if geopardquet_path is not None:
+        polygons_df.to_parquet(geopardquet_path)
 
 
 if __name__ == "__main__":
diff --git a/scripts/geojson2coco.py b/scripts/geojson2coco.py
index 5a77084..4cfce63 100644
--- a/scripts/geojson2coco.py
+++ b/scripts/geojson2coco.py
@@ -218,6 +218,9 @@ def main(args=None):
     log.debug("Class id is: %s", geojson["class_id"])
     log.debug("Trim class is: %s", trim_class)
     categories_json = make_category_object(geojson, class_column, trim_class)
+    
+    # Make sure geojson class_column is string type
+    geojson[class_column] = geojson[class_column].astype(str)
 
     # If license is not supplied, use MIT by default
     if license is None: