Skip to content

Commit

Permalink
Delete cache tiles using using bulk size - 1000
Browse files Browse the repository at this point in the history
  • Loading branch information
Rub21 committed Feb 6, 2025
1 parent fca2f22 commit 8a169af
Showing 1 changed file with 50 additions and 23 deletions.
73 changes: 50 additions & 23 deletions images/tiler-cache/s3_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import logging


def compute_children_tiles(s3_path, zoom_levels):
"""
Compute child tiles for the specified zoom levels from a parent tile file in S3.
Expand All @@ -13,21 +14,23 @@ def compute_children_tiles(s3_path, zoom_levels):
Returns:
list: A sorted list of unique child tile paths in "zoom/x/y" format only for the target zoom levels.
"""
logging.info(f"Starting computation of child tiles for {s3_path} and zoom levels {sorted(set(zoom_levels))}.")

logging.info(
f"Starting computation of child tiles for {s3_path} and zoom levels {sorted(set(zoom_levels))}."
)

s3_client = boto3.client("s3")
s3_match = re.match(r"s3://([^/]+)/(.+)", s3_path)
if not s3_match:
raise ValueError(f"Invalid S3 path: {s3_path}")

bucket_name, key = s3_match.groups()
child_tiles = set()

try:
logging.info(f"Fetching file from S3 bucket: {bucket_name}, key: {key}.")
response = s3_client.get_object(Bucket=bucket_name, Key=key)
file_content = response["Body"].read().decode("utf-8")

logging.info(f"Processing tiles in file.")
for line in file_content.splitlines():
tile = line.strip()
Expand All @@ -40,18 +43,21 @@ def compute_children_tiles(s3_path, zoom_levels):
y *= 2
z += 1
if z == target_zoom:
child_tiles.update([
f"{z}/{x}/{y}",
f"{z}/{x+1}/{y}",
f"{z}/{x}/{y+1}",
f"{z}/{x+1}/{y+1}"
])
child_tiles.update(
[
f"{z}/{x}/{y}",
f"{z}/{x+1}/{y}",
f"{z}/{x}/{y+1}",
f"{z}/{x+1}/{y+1}",
]
)

except Exception as e:
logging.error(f"Error processing S3 file: {e}")
raise

return sorted(child_tiles)
return sorted(child_tiles)


def generate_tile_patterns(tiles):
"""
Expand All @@ -64,7 +70,7 @@ def generate_tile_patterns(tiles):
list: List of unique patterns in the format 'zoom/prefix'.
"""
patterns = set()

for tile in tiles:
match = re.match(r"(\d+)/(\d+)/(\d+)", tile)
if match:
Expand All @@ -77,14 +83,16 @@ def generate_tile_patterns(tiles):
return sorted(patterns)


def delete_folders_by_pattern(bucket_name, patterns, path_file):
def delete_folders_by_pattern(bucket_name, patterns, path_file, batch_size=1000):
"""
Delete folders in the S3 bucket matching the pattern:
s3://<bucket>/mnt/data/osm/<zoom>/<prefix>
s3://<bucket>/mnt/data/osm/<zoom>/<prefix>, using bulk delete.
Args:
bucket_name (str): The name of the S3 bucket.
patterns (list): A list of patterns in the format '<zoom>/<prefix>'.
patterns (list): A list of patterns in the format '<zoom>/<prefix>...'.
path_file (str): The base path in S3 where objects are stored.
batch_size (int): Number of objects to delete per request (default 1000).
Returns:
None
Expand All @@ -94,17 +102,36 @@ def delete_folders_by_pattern(bucket_name, patterns, path_file):
try:
for pattern in patterns:
zoom, prefix = pattern.split("/")
folder_prefix = f"{path_file}/{zoom}/{prefix}"
logging.info(f"Looking for objects under folder: {folder_prefix}...")
folder_prefix = f"{path_file}/{zoom}/{prefix}/"
logging.info(f"Fetching objects under folder: {folder_prefix}...")

paginator = s3_client.get_paginator("list_objects_v2")
response_iterator = paginator.paginate(Bucket=bucket_name, Prefix=folder_prefix)
response_iterator = paginator.paginate(
Bucket=bucket_name, Prefix=folder_prefix
)

objects_to_delete = []
for page in response_iterator:
for obj in page.get("Contents", []):
key = obj["Key"]
logging.info(f"Deleting object: {key}")
s3_client.delete_object(Bucket=bucket_name, Key=key)
logging.info("Deletion completed for all matching patterns.")
objects_to_delete.append({"Key": obj["Key"]})

# Delete in batches of `batch_size`
if len(objects_to_delete) >= batch_size:
logging.info(f"Deleting {len(objects_to_delete)} objects...")
s3_client.delete_objects(
Bucket=bucket_name, Delete={"Objects": objects_to_delete}
)
objects_to_delete = []

# Delete remaining objects if any
if objects_to_delete:
logging.info(f"Deleting final {len(objects_to_delete)} objects...")
s3_client.delete_objects(
Bucket=bucket_name, Delete={"Objects": objects_to_delete}
)

logging.info("Bulk deletion completed for all matching patterns.")

except Exception as e:
logging.error(f"Error deleting folders: {e}")
logging.error(f"Error during bulk deletion: {e}")
raise

0 comments on commit 8a169af

Please # to comment.