From 65d1323931edddb6bdaa83085a8e420b25a60708 Mon Sep 17 00:00:00 2001 From: Radovan Date: Fri, 14 Jun 2024 11:42:56 +0300 Subject: [PATCH] Make object store read timeout configurable (#763) --- medusa-example.ini | 3 +++ medusa/config.py | 5 +++-- medusa/storage/azure_storage.py | 9 ++++++++- medusa/storage/google_storage.py | 10 +++++++--- medusa/storage/s3_base_storage.py | 3 ++- tests/storage/azure_storage_test.py | 3 +++ 6 files changed, 26 insertions(+), 7 deletions(-) diff --git a/medusa-example.ini b/medusa-example.ini index f57182f6..235e5634 100644 --- a/medusa-example.ini +++ b/medusa-example.ini @@ -118,6 +118,9 @@ use_sudo_for_restore = True ;aws_cli_path = +; Read timeout in seconds for the storage provider. +;read_timeout = 60 + [monitoring] ;monitoring_provider = diff --git a/medusa/config.py b/medusa/config.py index b7f55d30..3e133cf0 100644 --- a/medusa/config.py +++ b/medusa/config.py @@ -31,7 +31,7 @@ ['bucket_name', 'key_file', 'prefix', 'fqdn', 'host_file_separator', 'storage_provider', 'base_path', 'max_backup_age', 'max_backup_count', 'api_profile', 'transfer_max_bandwidth', 'concurrent_transfers', 'multi_part_upload_threshold', 'host', 'region', 'port', 'secure', 'ssl_verify', - 'aws_cli_path', 'kms_id', 'backup_grace_period_in_days', 'use_sudo_for_restore', 'k8s_mode'] + 'aws_cli_path', 'kms_id', 'backup_grace_period_in_days', 'use_sudo_for_restore', 'k8s_mode', 'read_timeout'] ) CassandraConfig = collections.namedtuple( @@ -116,7 +116,8 @@ def _build_default_config(): 'fqdn': socket.getfqdn(), 'region': 'default', 'backup_grace_period_in_days': 10, - 'use_sudo_for_restore': 'True' + 'use_sudo_for_restore': 'True', + 'read_timeout': 60 } config['logging'] = { diff --git a/medusa/storage/azure_storage.py b/medusa/storage/azure_storage.py index e929b339..209b3d55 100644 --- a/medusa/storage/azure_storage.py +++ b/medusa/storage/azure_storage.py @@ -56,6 +56,8 @@ def __init__(self, config): logging.getLogger('azure.core.pipeline.policies.http_logging_policy').setLevel(logging.WARNING) logging.getLogger('chardet.universaldetector').setLevel(logging.WARNING) + self.read_timeout = int(config.read_timeout) + super().__init__(config) def _make_blob_service_url(self, account_name, config): @@ -85,7 +87,10 @@ async def _disconnect(self): async def _list_blobs(self, prefix=None) -> t.List[AbstractBlob]: blobs = [] - async for b_props in self.azure_container_client.list_blobs(name_starts_with=str(prefix)): + async for b_props in self.azure_container_client.list_blobs( + name_starts_with=str(prefix), + timeout=self.read_timeout + ): blobs.append(AbstractBlob( b_props.name, b_props.size, @@ -150,6 +155,7 @@ async def _download_blob(self, src: str, dest: str): downloader = await self.azure_container_client.download_blob( blob=object_key, max_concurrency=workers, + timeout=self.read_timeout, ) Path(file_path).parent.mkdir(parents=True, exist_ok=True) await downloader.readinto(open(file_path, "wb")) @@ -206,6 +212,7 @@ async def _read_blob_as_bytes(self, blob: AbstractBlob) -> bytes: downloader = await self.azure_container_client.download_blob( blob=blob.name, max_concurrency=1, + timeout=self.read_timeout, ) return await downloader.readall() diff --git a/medusa/storage/google_storage.py b/medusa/storage/google_storage.py index 01fc2863..2fae757d 100644 --- a/medusa/storage/google_storage.py +++ b/medusa/storage/google_storage.py @@ -49,6 +49,8 @@ def __init__(self, config): logging.getLogger('gcloud.aio.storage.storage').setLevel(logging.WARNING) + self.read_timeout = int(config.read_timeout) + super().__init__(config) def connect(self): @@ -94,7 +96,8 @@ async def _paginate_objects(self, prefix=None): # fetch a page page = await self.gcs_storage.list_objects( bucket=self.bucket_name, - params=params + params=params, + timeout=self.read_timeout, ) # got nothing, return from the function @@ -151,7 +154,7 @@ async def _download_blob(self, src: str, dest: str): stream = await self.gcs_storage.download_stream( bucket=self.bucket_name, object_name=object_key, - timeout=-1, + timeout=self.read_timeout if self.read_timeout is not None else -1, ) Path(file_path).parent.mkdir(parents=True, exist_ok=True) with open(file_path, 'wb') as f: @@ -171,6 +174,7 @@ async def _stat_blob(self, object_key: str) -> AbstractBlob: blob = await self.gcs_storage.download_metadata( bucket=self.bucket_name, object_name=object_key, + timeout=self.read_timeout, ) return AbstractBlob( blob['name'], @@ -233,7 +237,7 @@ async def _read_blob_as_bytes(self, blob: AbstractBlob) -> bytes: bucket=self.bucket_name, object_name=blob.name, session=self.session, - timeout=-1 + timeout=self.read_timeout if self.read_timeout is not None else -1, ) return content diff --git a/medusa/storage/s3_base_storage.py b/medusa/storage/s3_base_storage.py index 2c250f85..24d51af7 100644 --- a/medusa/storage/s3_base_storage.py +++ b/medusa/storage/s3_base_storage.py @@ -136,7 +136,8 @@ def connect(self): region_name=self.credentials.region, signature_version='v4', tcp_keepalive=True, - max_pool_connections=max_pool_size + max_pool_connections=max_pool_size, + read_timeout=int(self.config.read_timeout), ) if self.credentials.access_key_id is not None: self.s3_client = boto3.client( diff --git a/tests/storage/azure_storage_test.py b/tests/storage/azure_storage_test.py index 7809eac6..02467a53 100644 --- a/tests/storage/azure_storage_test.py +++ b/tests/storage/azure_storage_test.py @@ -33,6 +33,7 @@ def test_make_connection_url(self): 'concurrent_transfers': '1', 'host': None, 'port': None, + 'read_timeout': 60, }) azure_storage = AzureStorage(config) self.assertEqual( @@ -52,6 +53,7 @@ def test_make_connection_url_with_custom_host(self): 'concurrent_transfers': '1', 'host': 'custom.host.net', 'port': None, + 'read_timeout': 60, }) azure_storage = AzureStorage(config) self.assertEqual( @@ -71,6 +73,7 @@ def test_make_connection_url_with_custom_host_port(self): 'concurrent_transfers': '1', 'host': 'custom.host.net', 'port': 123, + 'read_timeout': 60, }) azure_storage = AzureStorage(config) self.assertEqual(