From 8a169afec8e6da7c4865faa00dce12028a0360d4 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Thu, 6 Feb 2025 13:57:58 -0500 Subject: [PATCH] Delete cache tiles using using bulk size - 1000 --- images/tiler-cache/s3_cleanup.py | 73 ++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/images/tiler-cache/s3_cleanup.py b/images/tiler-cache/s3_cleanup.py index 01cfe4dc..fab7682f 100644 --- a/images/tiler-cache/s3_cleanup.py +++ b/images/tiler-cache/s3_cleanup.py @@ -2,6 +2,7 @@ import re import logging + def compute_children_tiles(s3_path, zoom_levels): """ Compute child tiles for the specified zoom levels from a parent tile file in S3. @@ -13,13 +14,15 @@ def compute_children_tiles(s3_path, zoom_levels): Returns: list: A sorted list of unique child tile paths in "zoom/x/y" format only for the target zoom levels. """ - logging.info(f"Starting computation of child tiles for {s3_path} and zoom levels {sorted(set(zoom_levels))}.") - + logging.info( + f"Starting computation of child tiles for {s3_path} and zoom levels {sorted(set(zoom_levels))}." + ) + s3_client = boto3.client("s3") s3_match = re.match(r"s3://([^/]+)/(.+)", s3_path) if not s3_match: raise ValueError(f"Invalid S3 path: {s3_path}") - + bucket_name, key = s3_match.groups() child_tiles = set() @@ -27,7 +30,7 @@ def compute_children_tiles(s3_path, zoom_levels): logging.info(f"Fetching file from S3 bucket: {bucket_name}, key: {key}.") response = s3_client.get_object(Bucket=bucket_name, Key=key) file_content = response["Body"].read().decode("utf-8") - + logging.info(f"Processing tiles in file.") for line in file_content.splitlines(): tile = line.strip() @@ -40,18 +43,21 @@ def compute_children_tiles(s3_path, zoom_levels): y *= 2 z += 1 if z == target_zoom: - child_tiles.update([ - f"{z}/{x}/{y}", - f"{z}/{x+1}/{y}", - f"{z}/{x}/{y+1}", - f"{z}/{x+1}/{y+1}" - ]) + child_tiles.update( + [ + f"{z}/{x}/{y}", + f"{z}/{x+1}/{y}", + f"{z}/{x}/{y+1}", + f"{z}/{x+1}/{y+1}", + ] + ) except Exception as e: logging.error(f"Error processing S3 file: {e}") raise - return sorted(child_tiles) + return sorted(child_tiles) + def generate_tile_patterns(tiles): """ @@ -64,7 +70,7 @@ def generate_tile_patterns(tiles): list: List of unique patterns in the format 'zoom/prefix'. """ patterns = set() - + for tile in tiles: match = re.match(r"(\d+)/(\d+)/(\d+)", tile) if match: @@ -77,14 +83,16 @@ def generate_tile_patterns(tiles): return sorted(patterns) -def delete_folders_by_pattern(bucket_name, patterns, path_file): +def delete_folders_by_pattern(bucket_name, patterns, path_file, batch_size=1000): """ Delete folders in the S3 bucket matching the pattern: - s3:///mnt/data/osm// + s3:///mnt/data/osm//, using bulk delete. Args: bucket_name (str): The name of the S3 bucket. - patterns (list): A list of patterns in the format '/'. + patterns (list): A list of patterns in the format '/...'. + path_file (str): The base path in S3 where objects are stored. + batch_size (int): Number of objects to delete per request (default 1000). Returns: None @@ -94,17 +102,36 @@ def delete_folders_by_pattern(bucket_name, patterns, path_file): try: for pattern in patterns: zoom, prefix = pattern.split("/") - folder_prefix = f"{path_file}/{zoom}/{prefix}" - logging.info(f"Looking for objects under folder: {folder_prefix}...") + folder_prefix = f"{path_file}/{zoom}/{prefix}/" + logging.info(f"Fetching objects under folder: {folder_prefix}...") + paginator = s3_client.get_paginator("list_objects_v2") - response_iterator = paginator.paginate(Bucket=bucket_name, Prefix=folder_prefix) + response_iterator = paginator.paginate( + Bucket=bucket_name, Prefix=folder_prefix + ) + objects_to_delete = [] for page in response_iterator: for obj in page.get("Contents", []): - key = obj["Key"] - logging.info(f"Deleting object: {key}") - s3_client.delete_object(Bucket=bucket_name, Key=key) - logging.info("Deletion completed for all matching patterns.") + objects_to_delete.append({"Key": obj["Key"]}) + + # Delete in batches of `batch_size` + if len(objects_to_delete) >= batch_size: + logging.info(f"Deleting {len(objects_to_delete)} objects...") + s3_client.delete_objects( + Bucket=bucket_name, Delete={"Objects": objects_to_delete} + ) + objects_to_delete = [] + + # Delete remaining objects if any + if objects_to_delete: + logging.info(f"Deleting final {len(objects_to_delete)} objects...") + s3_client.delete_objects( + Bucket=bucket_name, Delete={"Objects": objects_to_delete} + ) + + logging.info("Bulk deletion completed for all matching patterns.") + except Exception as e: - logging.error(f"Error deleting folders: {e}") + logging.error(f"Error during bulk deletion: {e}") raise