From 5db2d65d6e066fb636ed36d13b69b4fd0751b0f2 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sun, 1 Sep 2024 13:20:30 +0200 Subject: [PATCH 01/26] intial implementation of http --- cloudpathlib/__init__.py | 8 +- cloudpathlib/cloudpath.py | 15 ++- cloudpathlib/http/__init__.py | 7 ++ cloudpathlib/http/httpclient.py | 160 ++++++++++++++++++++++++++ cloudpathlib/http/httppath.py | 130 +++++++++++++++++++++ requirements-dev.txt | 1 + tests/conftest.py | 44 +++++++ tests/http_fixtures.py | 90 +++++++++++++++ tests/test_client.py | 5 + tests/test_cloudpath_file_io.py | 39 +++++-- tests/test_cloudpath_instantiation.py | 14 ++- tests/test_cloudpath_manipulation.py | 63 ++++++---- tests/test_cloudpath_upload_copy.py | 19 +-- 13 files changed, 542 insertions(+), 53 deletions(-) create mode 100644 cloudpathlib/http/__init__.py create mode 100644 cloudpathlib/http/httpclient.py create mode 100644 cloudpathlib/http/httppath.py create mode 100644 tests/http_fixtures.py diff --git a/cloudpathlib/__init__.py b/cloudpathlib/__init__.py index da4fe28e..c51fc45e 100644 --- a/cloudpathlib/__init__.py +++ b/cloudpathlib/__init__.py @@ -4,9 +4,11 @@ from .azure.azblobclient import AzureBlobClient from .azure.azblobpath import AzureBlobPath from .cloudpath import CloudPath, implementation_registry -from .s3.s3client import S3Client -from .gs.gspath import GSPath from .gs.gsclient import GSClient +from .gs.gspath import GSPath +from .http.httpclient import HttpClient +from .http.httppath import HttpPath +from .s3.s3client import S3Client from .s3.s3path import S3Path @@ -27,6 +29,8 @@ "implementation_registry", "GSClient", "GSPath", + "HttpClient", + "HttpPath", "S3Client", "S3Path", ] diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 5845e929..1e93e89d 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -27,7 +27,6 @@ Generator, List, Optional, - Sequence, Tuple, Type, TYPE_CHECKING, @@ -299,11 +298,11 @@ def __setstate__(self, state: Dict[str, Any]) -> None: @property def _no_prefix(self) -> str: - return self._str[len(self.cloud_prefix) :] + return self._str[len(self.anchor) :] @property def _no_prefix_no_drive(self) -> str: - return self._str[len(self.cloud_prefix) + len(self.drive) :] + return self._str[len(self.anchor) + len(self.drive) :] @overload @classmethod @@ -909,9 +908,9 @@ def relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath: # absolute) if not isinstance(other, CloudPath): raise ValueError(f"{self} is a cloud path, but {other} is not") - if self.cloud_prefix != other.cloud_prefix: + if self.anchor != other.anchor: raise ValueError( - f"{self} is a {self.cloud_prefix} path, but {other} is a {other.cloud_prefix} path" + f"{self} is a {self.anchor} path, but {other} is a {other.anchor} path" ) kwargs = dict(walk_up=walk_up) @@ -969,7 +968,7 @@ def parent(self) -> Self: return self._dispatch_to_path("parent") @property - def parents(self) -> Sequence[Self]: + def parents(self) -> Tuple[Self, ...]: return self._dispatch_to_path("parents") @property @@ -1258,8 +1257,8 @@ def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self: path = path[1:] # add prefix/anchor if it is not already - if not path.startswith(self.cloud_prefix): - path = f"{self.cloud_prefix}{path}" + if not path.startswith(self.anchor): + path = f"{self.anchor}{path}" return self.client.CloudPath(path) diff --git a/cloudpathlib/http/__init__.py b/cloudpathlib/http/__init__.py new file mode 100644 index 00000000..5ad785be --- /dev/null +++ b/cloudpathlib/http/__init__.py @@ -0,0 +1,7 @@ +from .httpclient import HttpClient +from .httppath import HttpPath + +__all__ = [ + "HttpClient", + "HttpPath", +] diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py new file mode 100644 index 00000000..ea76d006 --- /dev/null +++ b/cloudpathlib/http/httpclient.py @@ -0,0 +1,160 @@ +from datetime import datetime +import os +import re +import urllib.request +import urllib.parse +import urllib.error +from pathlib import Path +from typing import Iterable, Optional, Tuple, Union, Callable +import shutil +import mimetypes +import urllib.response + +import pytz + +from cloudpathlib.client import Client, register_client_class +from cloudpathlib.enums import FileCacheMode + +from .httppath import HttpPath + + +@register_client_class("http") +class HttpClient(Client): + def __init__( + self, + file_cache_mode: Optional[Union[str, FileCacheMode]] = None, + local_cache_dir: Optional[Union[str, os.PathLike]] = None, + content_type_method: Optional[Callable] = mimetypes.guess_type, + auth: Optional[urllib.request.BaseHandler] = None, + custom_list_page_parser: Optional[Callable[[str], Iterable[str]]] = None, + ): + super().__init__(file_cache_mode, local_cache_dir, content_type_method) + self.auth = auth + + if self.auth is None: + self.opener = urllib.request.build_opener() + else: + self.openener = urllib.request.build_opener(self.auth) + + self.custom_list_page_parser = custom_list_page_parser + + def _get_metadata(self, cloud_path: HttpPath) -> dict: + with self.opener.open(cloud_path.as_url()) as response: + last_modified = response.headers.get("Last-Modified", None) + + if last_modified is not None: + # per https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified + last_modified = datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z") + + # should always be utc https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified#gmt + last_modified = last_modified.replace(tzinfo=pytz.UTC) + + return { + "size": int(response.headers.get("Content-Length", 0)), + "last_modified": last_modified, + "content_type": response.headers.get("Content-Type", None), + } + + def _download_file(self, cloud_path: HttpPath, local_path: Union[str, os.PathLike]) -> Path: + local_path = Path(local_path) + with self.opener.open(cloud_path.as_url()) as response: + with open(local_path, "wb") as out_file: + shutil.copyfileobj(response, out_file) + return local_path + + def _exists(self, cloud_path: HttpPath) -> bool: + request = urllib.request.Request(cloud_path.as_url(), method="HEAD") + try: + with self.opener.open(request) as response: + return response.status == 200 + except (urllib.error.HTTPError, urllib.error.URLError) as e: + if isinstance(e, urllib.error.URLError) or e.code == 404: + return False + raise + + def _move_file(self, src: HttpPath, dst: HttpPath, remove_src: bool = True) -> HttpPath: + self._upload_file(src, dst) + if remove_src: + self._remove(src) + return dst + + def _remove(self, cloud_path: HttpPath, missing_ok: bool = True) -> None: + request = urllib.request.Request(cloud_path.as_url(), method="DELETE") + try: + with self.opener.open(request) as response: + if response.status != 204: + raise Exception(f"Failed to delete {cloud_path}.") + except urllib.error.HTTPError as e: + if e.code == 404 and missing_ok: + pass + else: + raise FileNotFoundError(f"Failed to delete {cloud_path}.") + + def _list_dir(self, cloud_path: HttpPath, recursive: bool) -> Iterable[Tuple[HttpPath, bool]]: + try: + with self.opener.open(cloud_path.as_url()) as response: + # Parse the directory listing + for path, is_dir in self._parse_list_dir_response( + response.read().decode(), base_url=str(cloud_path) + ): + yield path, is_dir + + # If it's a directory and recursive is True, list the contents of the directory + if recursive and is_dir: + yield from self._list_dir(path, recursive=True) + + except: # noqa E722 + raise NotImplementedError( + "Unable to parse response as a listing of files; please provide a custom parser as `custom_list_page_parser`." + ) + + def _upload_file(self, local_path: Union[str, os.PathLike], cloud_path: HttpPath) -> HttpPath: + local_path = Path(local_path) + if self.content_type_method is not None: + content_type, _ = self.content_type_method(local_path) + + headers = {"Content-Type": content_type or "application/octet-stream"} + + with open(local_path, "rb") as file_data: + request = urllib.request.Request( + cloud_path.as_url(), data=file_data.read(), method="PUT", headers=headers + ) + with self.opener.open(request) as response: + if response.status != 201 and response.status != 200: + raise Exception(f"Failed to upload {local_path} to {cloud_path}.") + return cloud_path + + def _get_public_url(self, cloud_path: HttpPath) -> str: + return cloud_path.as_url() + + def _generate_presigned_url(self, cloud_path: HttpPath, expire_seconds: int = 60 * 60) -> str: + raise NotImplementedError("Presigned URLs are not supported using urllib.") + + def _parse_list_dir_response( + self, response: str, base_url: str + ) -> Iterable[Tuple[HttpPath, bool]]: + # Ensure base_url ends with a trailing slash so joining works + if not base_url.endswith("/"): + base_url += "/" + + def _simple_links(html: str) -> Iterable[str]: + return re.findall(r' None: + request = urllib.request.Request(url.as_url(), method=method, **kwargs) + with self.opener.open(request) as response: + return response + + +HttpClient.HttpPath = HttpClient.CloudPath # type: ignore diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py new file mode 100644 index 00000000..b559f115 --- /dev/null +++ b/cloudpathlib/http/httppath.py @@ -0,0 +1,130 @@ +from pathlib import PurePosixPath +from typing import Tuple, Union, Optional + +import os +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import TYPE_CHECKING + +from ..cloudpath import CloudPath, NoStatError, register_path_class + + +if TYPE_CHECKING: + from .httpclient import HttpClient + + +@register_path_class("http") +class HttpPath(CloudPath): + cloud_prefix = "http://" + client: "HttpClient" + + def __init__( + self, + cloud_path: Union[str, "HttpPath"], + client: Optional["HttpClient"] = None, + ) -> None: + super().__init__(cloud_path, client) + + self._path = ( + PurePosixPath(self._url.path) + if self._url.path.startswith("/") + else PurePosixPath(f"/{self._url.path}") + ) + + @property + def drive(self) -> str: + # For HTTP paths, no drive; use .anchor for scheme + netloc + return self._url.netloc + + @property + def anchor(self) -> str: + return f"{self._url.scheme}://{self._url.netloc}/" + + @property + def _no_prefix_no_drive(self) -> str: + # netloc appears in anchor and drive for httppath; so don't double count + return self._str[len(self.anchor) - 1 :] + + def is_dir(self) -> bool: + if not self.exists(): + return False + + # HTTP doesn't really have directories, but some servers might list files if treated as such + # Here we'll assume paths without are dirs + return self._path.suffix == "" + + def is_file(self) -> bool: + if not self.exists(): + return False + + # HTTP doesn't have a direct file check, but we assume if it has a suffix, it's a file + return self._path.suffix != "" + + def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: + pass # no-op for HTTP Paths + + def touch(self, exist_ok: bool = True) -> None: + if self.exists(): + if not exist_ok: + raise FileExistsError(f"File already exists: {self}") + + raise NotImplementedError( + "Touch not implemented for existing HTTP files since we can't update the modified time." + ) + else: + empty_file = Path(TemporaryDirectory().name) / "empty_file.txt" + empty_file.parent.mkdir(parents=True, exist_ok=True) + empty_file.write_text("") + self.client._upload_file(empty_file, self) + + def stat(self, follow_symlinks: bool = True) -> os.stat_result: + try: + meta = self.client._get_metadata(self) + except: # noqa E722 + raise NoStatError(f"Could not get metadata for {self}") + + return os.stat_result( + ( # type: ignore + None, # mode + None, # ino + self.cloud_prefix, # dev, + None, # nlink, + None, # uid, + None, # gid, + meta.get("size", 0), # size, + None, # atime, + meta.get("last_modified", 0).timestamp(), # mtime, + None, # ctime, + ) + ) + + def as_url(self, presign: bool = False, expire_seconds: int = 60 * 60) -> str: + if presign: + raise NotImplementedError("Presigning not supported for HTTP paths") + + return ( + self._url.geturl() + ) # recreate from what was initialized so we have the same query params, etc. + + @property + def name(self) -> str: + return self._path.name + + @property + def parents(self) -> Tuple["HttpPath", ...]: + return super().parents + (self._new_cloudpath(""),) + + def get(self, **kwargs): + return self.client.request(self, "GET", **kwargs) + + def put(self, **kwargs): + return self.client.request(self, "PUT", **kwargs) + + def post(self, **kwargs): + return self.client.request(self, "POST", **kwargs) + + def delete(self, **kwargs): + return self.client.request(self, "DELETE", **kwargs) + + def head(self, **kwargs): + return self.client.request(self, "HEAD", **kwargs) diff --git a/requirements-dev.txt b/requirements-dev.txt index ac0544f5..80ba2997 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -34,4 +34,5 @@ tabulate tenacity tqdm typer +types-pytz wheel diff --git a/tests/conftest.py b/tests/conftest.py index 301ffe87..71e90f19 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ import shutil from tempfile import TemporaryDirectory from typing import Dict, Optional +from urllib.parse import urlparse from azure.storage.blob import BlobServiceClient from azure.storage.filedatalake import ( @@ -18,6 +19,8 @@ from cloudpathlib import AzureBlobClient, AzureBlobPath, GSClient, GSPath, S3Client, S3Path from cloudpathlib.cloudpath import implementation_registry +from cloudpathlib.http.httpclient import HttpClient +from cloudpathlib.http.httppath import HttpPath from cloudpathlib.local import ( local_azure_blob_implementation, LocalAzureBlobClient, @@ -42,6 +45,8 @@ from .mock_clients.mock_s3 import mocked_session_class_factory, DEFAULT_S3_BUCKET_NAME +from .http_fixtures import http_server # noqa: F401 + if os.getenv("USE_LIVE_CLOUD") == "1": load_dotenv(find_dotenv()) @@ -469,6 +474,44 @@ def local_s3_rig(request, monkeypatch, assets_dir): rig.client_class.reset_default_storage_dir() # reset local storage directory +class HttpProviderTestRig(CloudProviderTestRig): + def create_cloud_path(self, path: str, client=None): + """Http version needs to include netloc as well""" + if client: + return client.CloudPath( + cloud_path=f"{self.path_class.cloud_prefix}{self.drive}/{self.test_dir}/{path}" + ) + else: + return self.path_class( + cloud_path=f"{self.path_class.cloud_prefix}{self.drive}/{self.test_dir}/{path}" + ) + + +@fixture() +def http_rig(request, assets_dir, http_server): # noqa: F811 + test_dir = create_test_dir_name(request) + + host, server_dir = http_server + drive = urlparse(host).netloc + + # copy test assets + shutil.copytree(assets_dir, server_dir / test_dir) + + rig = CloudProviderTestRig( + path_class=HttpPath, + client_class=HttpClient, + drive=drive, + test_dir=test_dir, + ) + + rig.http_server_dir = server_dir + + yield rig + + rig.client_class._default_client = None # reset default client + shutil.rmtree(server_dir) + + # create azure fixtures for both blob and gen2 storage azure_rigs = fixture_union( "azure_rigs", @@ -489,6 +532,7 @@ def local_s3_rig(request, monkeypatch, assets_dir): local_azure_rig, local_s3_rig, local_gs_rig, + http_rig, ], ) diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py new file mode 100644 index 00000000..2b0a9f88 --- /dev/null +++ b/tests/http_fixtures.py @@ -0,0 +1,90 @@ +from datetime import datetime +from functools import partial +from http.server import HTTPServer, SimpleHTTPRequestHandler +from io import BytesIO +import os +from pathlib import Path +import shutil +import threading +import time +from urllib.request import urlopen + +from pytest import fixture + + +class TestHTTPRequestHandler(SimpleHTTPRequestHandler): + """Also allows PUT and DELETE requests for testing.""" + + def do_PUT(self): + length = int(self.headers["Content-Length"]) + path = Path(self.translate_path(self.path)) + + if path.is_dir(): + path.mkdir(parents=True, exist_ok=True) + else: + path.parent.mkdir(parents=True, exist_ok=True) + + with path.open("wb") as f: + f.write(self.rfile.read(length)) + + now = datetime.now().timestamp() + os.utime(path, (now, now)) + + self.send_response(201) + self.end_headers() + + def do_DELETE(self): + path = Path(self.translate_path(self.path)) + + try: + if path.is_dir(): + shutil.rmtree(path) + else: + path.unlink() + self.send_response(204) + except FileNotFoundError: + self.send_response(404) + + self.end_headers() + + def list_directory(self, path: str | os.PathLike[str]) -> BytesIO | None: + return super().list_directory(path) + + +@fixture(scope="module") +def http_server(tmp_path_factory, worker_id): + hostname = "localhost" + port = ( + 9077 + int(worker_id.lstrip("gw")) if worker_id != "master" else 0 + ) # don't collide if tests running in parallel with multiple servers + + # Create a temporary directory to serve files from + server_dir = tmp_path_factory.mktemp("server_files").resolve() + server_dir.mkdir(exist_ok=True) + + # Function to start the server + def start_server(): + handler = partial(TestHTTPRequestHandler, directory=str(server_dir)) + httpd = HTTPServer((hostname, port), handler) + httpd.serve_forever() + + # Start the server in a separate thread + server_thread = threading.Thread(target=start_server, daemon=True) + server_thread.start() + + # Wait for the server to start + for _ in range(10): + try: + urlopen(f"http://{hostname}:{port}") + break + except Exception: + time.sleep(0.1) + + yield f"http://{hostname}:{port}", server_dir + + # Stop the server by exiting the thread + server_thread.join(0) + + # Clean up the temporary directory if it still exists + if server_dir.exists(): + shutil.rmtree(server_dir) diff --git a/tests/test_client.py b/tests/test_client.py index fd58535b..78505696 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -9,6 +9,7 @@ from cloudpathlib import CloudPath from cloudpathlib.client import register_client_class from cloudpathlib.cloudpath import implementation_registry, register_path_class +from cloudpathlib.http.httpclient import HttpClient from cloudpathlib.s3.s3client import S3Client from cloudpathlib.s3.s3path import S3Path @@ -96,6 +97,10 @@ def _test_write_content_type(suffix, expected, rig_ref, check=True): for suffix, content_type in mimes: _test_write_content_type(suffix, content_type, rig, check=False) + if rig.client_class is HttpClient: + # HTTP client doesn't support custom content types + return + # custom mime type method def my_content_type(path): # do lookup for content types I define; fallback to diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index d367e1ae..5da8c3fe 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -14,17 +14,25 @@ CloudPathNotImplementedError, DirectoryNotEmptyError, ) +from cloudpathlib.http.httpclient import HttpClient +from cloudpathlib.http.httppath import HttpPath def test_file_discovery(rig): p = rig.create_cloud_path("dir_0/file0_0.txt") assert p.exists() - p2 = rig.create_cloud_path("dir_0/not_a_file") + p2 = rig.create_cloud_path("dir_0/not_a_file_yet.file") assert not p2.exists() p2.touch() assert p2.exists() - p2.touch(exist_ok=True) + + if rig.client_class not in [HttpClient]: # not supported to touch existing + p2.touch(exist_ok=True) + else: + with pytest.raises(NotImplementedError): + p2.touch(exist_ok=True) + with pytest.raises(FileExistsError): p2.touch(exist_ok=False) p2.unlink(missing_ok=False) @@ -83,12 +91,12 @@ def glob_test_dirs(rig, tmp_path): def _make_glob_directory(root): (root / "dirB").mkdir() - (root / "dirB" / "fileB").write_text("fileB") + (root / "dirB" / "fileB.txt").write_text("fileB") (root / "dirC").mkdir() (root / "dirC" / "dirD").mkdir() - (root / "dirC" / "dirD" / "fileD").write_text("fileD") - (root / "dirC" / "fileC").write_text("fileC") - (root / "fileA").write_text("fileA") + (root / "dirC" / "dirD" / "fileD.txt").write_text("fileD") + (root / "dirC" / "fileC.txt").write_text("fileC") + (root / "fileA.txt").write_text("fileA") cloud_root = rig.create_cloud_path("glob-tests") cloud_root.mkdir() @@ -181,6 +189,9 @@ def test_walk(glob_test_dirs): def test_list_buckets(rig): + if rig.path_class in [HttpPath]: + return # no bucket listing for HTTP + # test we can list buckets buckets = list(rig.path_class(f"{rig.path_class.cloud_prefix}").iterdir()) assert len(buckets) > 0 @@ -349,7 +360,7 @@ def test_is_dir_is_file(rig, tmp_path): def test_file_read_writes(rig, tmp_path): p = rig.create_cloud_path("dir_0/file0_0.txt") - p2 = rig.create_cloud_path("dir_0/not_a_file") + p2 = rig.create_cloud_path("dir_0/not_a_file.txt") p3 = rig.create_cloud_path("") text = "lalala" * 10_000 @@ -367,16 +378,20 @@ def test_file_read_writes(rig, tmp_path): before_touch = datetime.now() sleep(1) - p.touch() - if not getattr(rig, "is_custom_s3", False): - # Our S3Path.touch implementation does not update mod time for MinIO - assert datetime.fromtimestamp(p.stat().st_mtime) > before_touch + + if rig.path_class not in [HttpPath]: # not supported to touch existing + p.touch() + + if not getattr(rig, "is_custom_s3", False): + # Our S3Path.touch implementation does not update mod time for MinIO + assert datetime.fromtimestamp(p.stat().st_mtime) > before_touch # no-op if not getattr(rig, "is_adls_gen2", False): p.mkdir() - assert p.etag is not None + if rig.path_class not in [HttpPath]: # not supported to touch existing + assert p.etag is not None dest = rig.create_cloud_path("dir2/new_file0_0.txt") assert not dest.exists() diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index 4be6085c..15faf25f 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -7,6 +7,7 @@ from cloudpathlib import AzureBlobPath, CloudPath, GSPath, S3Path from cloudpathlib.exceptions import InvalidPrefixError, MissingDependenciesError +from cloudpathlib.http.httppath import HttpPath @pytest.mark.parametrize( @@ -45,6 +46,9 @@ def test_dispatch_error(): @pytest.mark.parametrize("path", ["b/k", "b/k", "b/k.file", "b/k", "b"]) def test_instantiation(rig, path): + if rig.path_class in [HttpPath]: + path = "example-url.com/" + path + # check two cases of prefix for prefix in [rig.cloud_prefix.lower(), rig.cloud_prefix.upper()]: expected = prefix + path @@ -52,13 +56,17 @@ def test_instantiation(rig, path): assert repr(p) == f"{rig.path_class.__name__}('{expected}')" assert str(p) == expected - assert p._no_prefix == expected.split("://", 1)[-1] + if rig.path_class in [HttpPath]: + assert p._no_prefix == path.replace("example-url.com/", "") + assert str(p._path) == path.replace("example-url.com", "") + + else: + assert p._no_prefix == expected.split("://", 1)[-1] + assert str(p._path) == expected.split(":/", 1)[-1] assert p._url.scheme == expected.split("://", 1)[0].lower() assert p._url.netloc == expected.split("://", 1)[-1].split("/")[0] - assert str(p._path) == expected.split(":/", 1)[-1] - def test_default_client_lazy(rig): cp = rig.path_class(rig.cloud_prefix + "testing/file.txt") diff --git a/tests/test_cloudpath_manipulation.py b/tests/test_cloudpath_manipulation.py index b9e70669..94d38191 100644 --- a/tests/test_cloudpath_manipulation.py +++ b/tests/test_cloudpath_manipulation.py @@ -5,6 +5,7 @@ import pytest from cloudpathlib import CloudPath +from cloudpathlib.http.httppath import HttpPath def test_properties(rig): @@ -84,16 +85,27 @@ def test_joins(rig): if sys.version_info >= (3, 12): assert rig.create_cloud_path("a/b/c/d").match("A/*/C/D", case_sensitive=False) - assert rig.create_cloud_path("a/b/c/d").anchor == rig.cloud_prefix + if rig.path_class not in [HttpPath]: + assert rig.create_cloud_path("a/b/c/d").anchor == rig.cloud_prefix + assert rig.create_cloud_path("a/b/c/d").parent == rig.create_cloud_path("a/b/c") - assert rig.create_cloud_path("a/b/c/d").parents == ( - rig.create_cloud_path("a/b/c"), - rig.create_cloud_path("a/b"), - rig.create_cloud_path("a"), - rig.path_class(f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}"), - rig.path_class(f"{rig.cloud_prefix}{rig.drive}"), - ) + if rig.path_class not in [HttpPath]: + assert rig.create_cloud_path("a/b/c/d").parents == ( + rig.create_cloud_path("a/b/c"), + rig.create_cloud_path("a/b"), + rig.create_cloud_path("a"), + rig.path_class(f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}"), + rig.path_class(f"{rig.cloud_prefix}{rig.drive}"), + ) + else: + assert rig.create_cloud_path("a/b/c/d").parents == ( + rig.create_cloud_path("a/b/c"), + rig.create_cloud_path("a/b"), + rig.create_cloud_path("a"), + rig.path_class(f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}"), + rig.path_class(f"{rig.cloud_prefix}{rig.drive}/"), + ) assert rig.create_cloud_path("a").joinpath("b", "c") == rig.create_cloud_path("a/b/c") assert rig.create_cloud_path("a").joinpath(PurePosixPath("b"), "c") == rig.create_cloud_path( @@ -107,21 +119,32 @@ def test_joins(rig): == f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}/a/b/c" ) - assert rig.create_cloud_path("a/b/c/d").parts == ( - rig.cloud_prefix, - rig.drive, - rig.test_dir, - "a", - "b", - "c", - "d", - ) + if rig.path_class in [HttpPath]: + assert rig.create_cloud_path("a/b/c/d").parts == ( + rig.cloud_prefix + rig.drive + "/", + rig.test_dir, + "a", + "b", + "c", + "d", + ) + else: + assert rig.create_cloud_path("a/b/c/d").parts == ( + rig.cloud_prefix, + rig.drive, + rig.test_dir, + "a", + "b", + "c", + "d", + ) def test_with_segments(rig): - assert rig.create_cloud_path("a/b/c/d").with_segments("x", "y", "z") == rig.client_class( - **rig.required_client_kwargs - ).CloudPath(f"{rig.cloud_prefix}x/y/z") + to_test = rig.create_cloud_path("a/b/c/d").with_segments("x", "y", "z") + assert to_test == rig.client_class(**rig.required_client_kwargs).CloudPath( + f"{to_test.anchor}x/y/z" + ) def test_is_junction(rig): diff --git a/tests/test_cloudpath_upload_copy.py b/tests/test_cloudpath_upload_copy.py index acf5e5ec..6e447f49 100644 --- a/tests/test_cloudpath_upload_copy.py +++ b/tests/test_cloudpath_upload_copy.py @@ -4,6 +4,7 @@ import pytest +from cloudpathlib.http.httppath import HttpPath from cloudpathlib.local import LocalGSPath, LocalS3Path, LocalS3Client from cloudpathlib.exceptions import ( CloudPathFileExistsError, @@ -64,12 +65,12 @@ def test_upload_from_file(rig, upload_assets_dir): assert p.read_text() == "Hello from 2" # to file, file exists and is newer - p.touch() + p.write_text("newer") with pytest.raises(OverwriteNewerCloudError): p.upload_from(upload_assets_dir / "upload_1.txt") # to file, file exists and is newer; overwrite - p.touch() + p.write_text("even newer") sleep(1.1) p.upload_from(upload_assets_dir / "upload_1.txt", force_overwrite_to_cloud=True) assert p.exists() @@ -100,12 +101,12 @@ def test_upload_from_dir(rig, upload_assets_dir): # a newer file exists on cloud sleep(1) - (p / "upload_1.txt").touch() + (p / "upload_1.txt").write_text("newer") with pytest.raises(OverwriteNewerCloudError): p.upload_from(upload_assets_dir) # force overwrite - (p / "upload_1.txt").touch() + (p / "upload_1.txt").write_text("even newer") (p / "upload_2.txt").unlink() p.upload_from(upload_assets_dir, force_overwrite_to_cloud=True) assert assert_mirrored(p, upload_assets_dir) @@ -135,9 +136,11 @@ def test_copy(rig, upload_assets_dir, tmpdir): # cloud to cloud -> make sure no local cache p_new = p.copy(p.parent / "new_upload_1.txt") assert p_new.exists() - assert not p_new._local.exists() # cache should never have been downloaded - assert not p._local.exists() # cache should never have been downloaded - assert p_new.read_text() == "Hello from 1" + + if rig.path_class not in [HttpPath]: + assert not p_new._local.exists() # cache should never have been downloaded + assert not p._local.exists() # cache should never have been downloaded + assert p_new.read_text() == "Hello from 1" # cloud to cloud path as string cloud_dest = str(p.parent / "new_upload_0.txt") @@ -153,7 +156,7 @@ def test_copy(rig, upload_assets_dir, tmpdir): assert p_new.read_text() == "Hello from 1" # cloud to cloud overwrite - p_new.touch() + p_new.write_text("p_new") with pytest.raises(OverwriteNewerCloudError): p_new = p.copy(p_new) From 8745ef48a7f22d142ef8d8a0c97c8b8d8a8edd5c Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sun, 1 Sep 2024 13:28:26 +0200 Subject: [PATCH 02/26] remove unused func --- tests/http_fixtures.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index 2b0a9f88..060beb94 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -1,7 +1,6 @@ from datetime import datetime from functools import partial from http.server import HTTPServer, SimpleHTTPRequestHandler -from io import BytesIO import os from pathlib import Path import shutil @@ -47,9 +46,6 @@ def do_DELETE(self): self.end_headers() - def list_directory(self, path: str | os.PathLike[str]) -> BytesIO | None: - return super().list_directory(path) - @fixture(scope="module") def http_server(tmp_path_factory, worker_id): From 0d9a95fe15f0770b14a00930cb31f28326a138c4 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 16 Sep 2024 14:56:46 -0400 Subject: [PATCH 03/26] add https support --- cloudpathlib/http/httpclient.py | 14 ++++- cloudpathlib/http/httppath.py | 5 ++ tests/conftest.py | 42 +++++++++++++- tests/http_fixtures.py | 91 +++++++++++++++++++++++++------ tests/test_http.py | 36 ++++++++++++ tests/utilities/insecure-test.csr | 27 +++++++++ tests/utilities/insecure-test.key | 52 ++++++++++++++++++ tests/utilities/insecure-test.pem | 30 ++++++++++ 8 files changed, 273 insertions(+), 24 deletions(-) create mode 100644 tests/test_http.py create mode 100644 tests/utilities/insecure-test.csr create mode 100644 tests/utilities/insecure-test.key create mode 100644 tests/utilities/insecure-test.pem diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py index ea76d006..1cb7dc2c 100644 --- a/cloudpathlib/http/httpclient.py +++ b/cloudpathlib/http/httpclient.py @@ -34,7 +34,7 @@ def __init__( if self.auth is None: self.opener = urllib.request.build_opener() else: - self.openener = urllib.request.build_opener(self.auth) + self.opener = urllib.request.build_opener(self.auth) self.custom_list_page_parser = custom_list_page_parser @@ -103,9 +103,9 @@ def _list_dir(self, cloud_path: HttpPath, recursive: bool) -> Iterable[Tuple[Htt if recursive and is_dir: yield from self._list_dir(path, recursive=True) - except: # noqa E722 + except Exception as e: # noqa E722 raise NotImplementedError( - "Unable to parse response as a listing of files; please provide a custom parser as `custom_list_page_parser`." + f"Unable to parse response as a listing of files; please provide a custom parser as `custom_list_page_parser`. Error raised: {e}" ) def _upload_file(self, local_path: Union[str, os.PathLike], cloud_path: HttpPath) -> HttpPath: @@ -158,3 +158,11 @@ def request(self, url: HttpPath, method: str, **kwargs) -> None: HttpClient.HttpPath = HttpClient.CloudPath # type: ignore + + +@register_client_class("https") +class HttpsClient(HttpClient): + pass + + +HttpsClient.HttpsPath = HttpsClient.CloudPath # type: ignore diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py index b559f115..e8953247 100644 --- a/cloudpathlib/http/httppath.py +++ b/cloudpathlib/http/httppath.py @@ -128,3 +128,8 @@ def delete(self, **kwargs): def head(self, **kwargs): return self.client.request(self, "HEAD", **kwargs) + + +@register_path_class("https") +class HttpsPath(HttpPath): + cloud_prefix: str = "https://" diff --git a/tests/conftest.py b/tests/conftest.py index 71e90f19..e5c47211 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,11 @@ import os from pathlib import Path, PurePosixPath import shutil +import ssl from tempfile import TemporaryDirectory from typing import Dict, Optional from urllib.parse import urlparse +from urllib.request import HTTPSHandler from azure.storage.blob import BlobServiceClient from azure.storage.filedatalake import ( @@ -19,8 +21,8 @@ from cloudpathlib import AzureBlobClient, AzureBlobPath, GSClient, GSPath, S3Client, S3Path from cloudpathlib.cloudpath import implementation_registry -from cloudpathlib.http.httpclient import HttpClient -from cloudpathlib.http.httppath import HttpPath +from cloudpathlib.http.httpclient import HttpClient, HttpsClient +from cloudpathlib.http.httppath import HttpPath, HttpsPath from cloudpathlib.local import ( local_azure_blob_implementation, LocalAzureBlobClient, @@ -45,7 +47,7 @@ from .mock_clients.mock_s3 import mocked_session_class_factory, DEFAULT_S3_BUCKET_NAME -from .http_fixtures import http_server # noqa: F401 +from .http_fixtures import http_server, https_server, utilities_dir # noqa: F401 if os.getenv("USE_LIVE_CLOUD") == "1": load_dotenv(find_dotenv()) @@ -505,6 +507,40 @@ def http_rig(request, assets_dir, http_server): # noqa: F811 ) rig.http_server_dir = server_dir + rig.client_class(**rig.required_client_kwargs).set_as_default_client() # set default client + + yield rig + + rig.client_class._default_client = None # reset default client + shutil.rmtree(server_dir) + + +@fixture() +def https_rig(request, assets_dir, https_server): # noqa: F811 + test_dir = create_test_dir_name(request) + + host, server_dir = https_server + drive = urlparse(host).netloc + + # copy test assets + shutil.copytree(assets_dir, server_dir / test_dir) + + skip_verify_ctx = ssl.SSLContext() + skip_verify_ctx.check_hostname = False + skip_verify_ctx.load_verify_locations(utilities_dir / "insecure-test.pem") + + rig = CloudProviderTestRig( + path_class=HttpsPath, + client_class=HttpsClient, + drive=drive, + test_dir=test_dir, + required_client_kwargs=dict( + auth=HTTPSHandler(context=skip_verify_ctx, check_hostname=False) + ), + ) + + rig.http_server_dir = server_dir + rig.client_class(**rig.required_client_kwargs).set_as_default_client() # set default client yield rig diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index 060beb94..754d4a65 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -4,6 +4,7 @@ import os from pathlib import Path import shutil +import ssl import threading import time from urllib.request import urlopen @@ -11,6 +12,9 @@ from pytest import fixture +utilities_dir = Path(__file__).parent / "utilities" + + class TestHTTPRequestHandler(SimpleHTTPRequestHandler): """Also allows PUT and DELETE requests for testing.""" @@ -47,40 +51,91 @@ def do_DELETE(self): self.end_headers() -@fixture(scope="module") -def http_server(tmp_path_factory, worker_id): - hostname = "localhost" - port = ( - 9077 + int(worker_id.lstrip("gw")) if worker_id != "master" else 0 - ) # don't collide if tests running in parallel with multiple servers +def _http_server( + root_dir, port, hostname="localhost", use_ssl=False, certfile=None, keyfile=None, threaded=True +): + root_dir.mkdir(exist_ok=True) - # Create a temporary directory to serve files from - server_dir = tmp_path_factory.mktemp("server_files").resolve() - server_dir.mkdir(exist_ok=True) + scheme = "http" if not use_ssl else "https" - # Function to start the server def start_server(): - handler = partial(TestHTTPRequestHandler, directory=str(server_dir)) + handler = partial(TestHTTPRequestHandler, directory=str(root_dir)) httpd = HTTPServer((hostname, port), handler) + + if use_ssl: + if not certfile or not keyfile: + raise ValueError("certfile and keyfile must be provided if `ssl=True`") + + context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + context.load_cert_chain(certfile=certfile, keyfile=keyfile) + context.check_hostname = False + httpd.socket = context.wrap_socket(httpd.socket, server_side=True) + httpd.serve_forever() - # Start the server in a separate thread - server_thread = threading.Thread(target=start_server, daemon=True) - server_thread.start() + if threaded: + server_thread = threading.Thread(target=start_server, daemon=True) + server_thread.start() + + else: + start_server() # Wait for the server to start for _ in range(10): try: - urlopen(f"http://{hostname}:{port}") + if use_ssl: + req_context = ssl.SSLContext() + req_context.check_hostname = False + req_context.verify_mode = ssl.CERT_NONE + else: + req_context = None + + urlopen(f"{scheme}://{hostname}:{port}", context=req_context) + break except Exception: time.sleep(0.1) - yield f"http://{hostname}:{port}", server_dir + return f"{scheme}://{hostname}:{port}", server_thread + + +@fixture(scope="module") +def http_server(tmp_path_factory, worker_id): + port = 9077 + ( + int(worker_id.lstrip("gw")) if worker_id != "master" else 0 + ) # don't collide if tests running in parallel with multiple servers + + server_dir = tmp_path_factory.mktemp("server_files").resolve() + + host, server_thread = _http_server(server_dir, port) + + yield host, server_dir + + server_thread.join(0) + + if server_dir.exists(): + shutil.rmtree(server_dir) + + +@fixture(scope="module") +def https_server(tmp_path_factory, worker_id): + port = 4443 + ( + int(worker_id.lstrip("gw")) if worker_id != "master" else 0 + ) # don't collide if tests running in parallel with multiple servers + + server_dir = tmp_path_factory.mktemp("server_files").resolve() + + host, server_thread = _http_server( + server_dir, + port, + use_ssl=True, + certfile=utilities_dir / "insecure-test.pem", + keyfile=utilities_dir / "insecure-test.key", + ) + + yield host, server_dir - # Stop the server by exiting the thread server_thread.join(0) - # Clean up the temporary directory if it still exists if server_dir.exists(): shutil.rmtree(server_dir) diff --git a/tests/test_http.py b/tests/test_http.py new file mode 100644 index 00000000..d75c523f --- /dev/null +++ b/tests/test_http.py @@ -0,0 +1,36 @@ +from tests.conftest import CloudProviderTestRig + + +def test_https(https_rig: CloudProviderTestRig): + """Basic tests for https; we run the full suite against the http_rig""" + existing_file = https_rig.create_cloud_path("dir_0/file0_0.txt") + + # existence and listing + assert existing_file.exists() + assert existing_file.parent.exists() + assert existing_file.name in [f.name for f in existing_file.parent.iterdir()] + + # root level checks + root = list(existing_file.parents)[-1] + assert root.exists() + assert len(list(root.iterdir())) > 0 + + # reading and wrirting + existing_file.write_text("Hello from 0") + assert existing_file.read_text() == "Hello from 0" + + # creating new files + not_existing_file = https_rig.create_cloud_path("dir_0/new_file.txt") + + assert not not_existing_file.exists() + + not_existing_file.upload_from(existing_file) + + assert not_existing_file.read_text() == "Hello from 0" + + # deleteing + not_existing_file.unlink() + assert not not_existing_file.exists() + + # metadata + assert existing_file.stat().st_mtime != 0 diff --git a/tests/utilities/insecure-test.csr b/tests/utilities/insecure-test.csr new file mode 100644 index 00000000..01625734 --- /dev/null +++ b/tests/utilities/insecure-test.csr @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEqjCCApICAQAwSDELMAkGA1UEBhMCVVMxETAPBgNVBAgMCENvbG9yYWRvMQ8w +DQYDVQQHDAZEZW52ZXIxFTATBgNVBAoMDGNsb3VkcGF0aGxpYjCCAiIwDQYJKoZI +hvcNAQEBBQADggIPADCCAgoCggIBAK5PvMKSP46Sf+8kEFEQdbMkcr9Oph1pzPK6 +yIRwWJK2CRTduLKYjzeivyS3roqKf2RK8CI3/aPRdMENADdAlUvRkfHYy1VyJey+ +9kuZ/DZfcmMXcUkNfiezv2PltGSL0eGYlWCCH2sAZc51LZrBwfnma1NAXiqDe0yD +36izMxIKgoGQ+DoatxNhQVYprDOi4VRW7qtw6V2Y/zqBFXctjBVeLyEm4c0MLdUQ +I/Ftw1mcttPmFWgfkGuOEeDdL7HFTbRj6PpzIC4mh1OSDONmv455XSQmia4egrDS +bpIrBOH8Al3fukD8R+Bwv0thWjVezFUQCxiynfASq6Lhb/kqTp93XcWw4DVaVPox +xGUDqDgfPq4XGxrKQR3ah94c/7jyhz4ih6td5KLf4hvExK77i3l61dgqW/86uj7g +gJEkWcAAY/SVnZneZSEClM82P/YyGavTTzw6ibi1n2zaRnRjuzEqiC6C92VoYlWF +F4S50o/gHhCHYWb775IIt8CAYqqryBHrN0r2vvJVU6lOmHTsnfbVv+XzGgNroBP9 +NsP1jDJA04XGMCq6DT8B5V5GO6kVn37Uqb5ER6RTBTxlcHh6oqtzdoHlVxMjdLwh +HPAug/DTZn4a1b9zTyK1YqSzNIM8eV/ckmySG5YMZJQovMHd7YVzB4hjq9kVupxa +bfPhjIHxAgMBAAGgHTAbBgkqhkiG9w0BCQcxDgwMY2xvdWRwYXRobGliMA0GCSqG +SIb3DQEBCwUAA4ICAQBeTRNKjo+ol3zuKfteuKt7+mLWzL3/qtUSCmCwt6t+NebN +ebkOwZA4HW0uUt/rdht58CJAvFri+DnkEidt/ldcg/CQ/tpWALZFdNa2z0hb+qEL +Q7wHO1QkwHG8/Q7yrcBNGSDsp4l7cH+8FQBcAVJxn++ixTe4dIiyscUdNRkXywsT +/UdQlK3oULR7Zv9k3nDErXTow/6QazjxtUyrfyuFdSDTAKJaKCOLt5NcJif/Ev3G +rUMJQElNz3W0P73ci+ueuihYdaveDx1vptO9VCBnwFOyTgjCYPS9g3MB8KIh5cJz +sj2J5J5tEUsyAa8ky4hvoLyP7GE29XvPA8pH1rOtQ++lmMzpP1vkPEGe0ezXrw2y +h4LBJXeMCg3/r3otEHnppI5PRTX3m1WlHyInpFIjets6VLDKjwENyreDmO5hIfRd +4ZxjxYzG97Tekoa+v9Y9qf3YCCGvbswOwfyj8hNheoMKv2f+rG2MwSPWfYlML/oT +4UA/C3o9Y7oa7H9FdEiTuXRgLcKUZqZJ0JuVhSbdPAAYSdrQE/EF06jyU6ZENxUu +0UJRwaXLETIIii99TUxyTmJTrvWAEbo5hpwfA1P6aaCLtWj0Qm6WSD3uLjU56yaX +6Q2kdspxv1BiT2TC4RO/ZH/8OwmSfe0dSg8jEOQf2+B0DcTPD+yHjo2hZWpT0A== +-----END CERTIFICATE REQUEST----- diff --git a/tests/utilities/insecure-test.key b/tests/utilities/insecure-test.key new file mode 100644 index 00000000..2c6f5248 --- /dev/null +++ b/tests/utilities/insecure-test.key @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQCuT7zCkj+Okn/v +JBBREHWzJHK/TqYdaczyusiEcFiStgkU3biymI83or8kt66Kin9kSvAiN/2j0XTB +DQA3QJVL0ZHx2MtVciXsvvZLmfw2X3JjF3FJDX4ns79j5bRki9HhmJVggh9rAGXO +dS2awcH55mtTQF4qg3tMg9+oszMSCoKBkPg6GrcTYUFWKawzouFUVu6rcOldmP86 +gRV3LYwVXi8hJuHNDC3VECPxbcNZnLbT5hVoH5BrjhHg3S+xxU20Y+j6cyAuJodT +kgzjZr+OeV0kJomuHoKw0m6SKwTh/AJd37pA/EfgcL9LYVo1XsxVEAsYsp3wEqui +4W/5Kk6fd13FsOA1WlT6McRlA6g4Hz6uFxsaykEd2ofeHP+48oc+IoerXeSi3+Ib +xMSu+4t5etXYKlv/Oro+4ICRJFnAAGP0lZ2Z3mUhApTPNj/2Mhmr0088Oom4tZ9s +2kZ0Y7sxKogugvdlaGJVhReEudKP4B4Qh2Fm+++SCLfAgGKqq8gR6zdK9r7yVVOp +Tph07J321b/l8xoDa6AT/TbD9YwyQNOFxjAqug0/AeVeRjupFZ9+1Km+REekUwU8 +ZXB4eqKrc3aB5VcTI3S8IRzwLoPw02Z+GtW/c08itWKkszSDPHlf3JJskhuWDGSU +KLzB3e2FcweIY6vZFbqcWm3z4YyB8QIDAQABAoICAAvBHOdCSd7CTLYZY/kkl4D8 +sbkO+nOumUPx3F+MynaKzGKw5lczESpz7EaFp4pl9zdbE7yFRVchK/LeIzXNeSz8 +ecZXeCGP3A/XH+cgyTUdJr0GmuEf4XIpyzKN+8qoO+0KfhjKb9GBuABZdYKaSh2z +S2kLRMnCaip3FKgKjzbclm1auA8F3E50CWc7rXPYhXk5RqQxG6gUoVaNRR+BnbVy +T4kl+7gv9/09NsBrIcqTQ97pKWf03zl7y3D8DfODkVhbQLAttfa/4V/Y0BRkuAEk +wYumvVh6IvGQRNxjK0F6y8U0EmNSLYt+yAQgyENIXEzobozXmFtU1dX/fZxNix7n +9fRXFBjOHVJNyW2nYgdVPeENbG+3u68baVsYG8sjsbk6XJyh9SMozEPaOCIQGWcr +pFz9yZb2rCZKvqlz09Qnhx1TKblMnUkC1VmVXLZOgylhJY12aueibNpaPw6LHPu1 +8JUnN0e2PIUjl4wWn6GPmkN+PSMm6khUTwYZx199fC9QFuxkij1qG5iQwvvsuMIH +gxvjO3XP2RAR01UNxhPPG+PgM6g3TBCfRd2B21toKgKNC9kzwsVLg251czxeTVh1 +2/uK0h06MkqHl11fJvBrWKLUhsnpgNqMSGusDIvf9vA39LvJSVxAcE550/dhdbY9 +VSjPnS5jcsK7JA4RgJ3rAoIBAQD09k5m8H+pky3+RMP0BP0apsJk5feLFl++FQc0 +otLgPzKSBWPqdhY/2R/UJKBQEc28CkWrtoe5wr51gM3t/ytU4xltYY+o6MyaAChD +rtwhm62Uu0X/CA1G9FTmjQJkCmNybwHzaqoHZ4kEax3WVGx0FC6Zxp2rl/wIDYuJ +z1tls+MMsVAoeoDCoxpRzSxWqY4xeEROuJoEOPdesPCkUqqCga1rT6+I8IUA7lmb +wjrOD7RB3RyEuM5oxfIJBuXZKlgHGjF1M0eCo9xjQFZPCG2lkoNn5UJofEz8Ktbv +Cazx6YvHSMYuowEsonbuz2C3er2ydyCNIuE+n1oLGBz9RmKjAoIBAQC2KnWvhfM4 +sz31lxKDg5xPszU7wozItTWzMXsg6hXi/wIFtFc7Y23IY8al5WiUaO9fV42nOUDB +gNk684lsKPR144XE5jxUSzVqM9DCLj931fHpuAkmxr6bkhxnDMK37QQ3YUib68ca +nBucqozaoS15sdgzTc25xNWgPuLHxq3wVBi1bELbSgLrrWVHr8hB3xTLF1WbCLxC +RlNlSc7EnJ841xx1mZmTwxsWG+bHfs6NjgD4zVqbjLSj5Orv8f0pD4AE8pyISlr+ ++rJTT6iaHQvCKMYv4Ynfa74YA168BBR+9IcstrIkdno25uHOXDb97V32ab5S3yFW +YlRE0lEHA+ZbAoIBADrPX2dLWfrmQOaIr9rHz9Q0MPt0Uofr6TSDa+VxKdt4kLWJ +4cEKdLEDeaa+3FYc0B3TAYMGIXOxk3Q2Zjo7RrXsh9BA2fKdYXGflAsb0fGnpHbO +tzFRR46/Xhqzw90suU9h40ADXarFapnK9bDdN+Rua/mzO2tU48czKUr+o1y5YUtM +zofJUVxpOApnjbuInYC29P9JRoC5BHqRVFS/G/yVEYNv8B6aT/Q3RQAmE2QhVQ9y +/EPI8pUo4MDWDRykE9owqasPkp2EpYaWjaIPzfMwR6gL3HOlU/4+creUxRaXEV3Y +1OuhasjCgHc5BmlGaICOJRx9QUJ9k2qScXNFEK0CggEBALYazhkQdCtLa/YV9wkH +yXwXL3E1NJ30IOGo5mjp+cU5neDr/oQ9CmNX8x6mrhwmpp0OHmp8HpMSZpa7HLbG +XlN3hqNmcgrmQFiRwfBMYWA/vR0iv+tGpQdKUiBmLkXFqABgvUA5vImKY0QDbtmk +ZJySQApRjgZWkiQmmXiS0hE9UJIUzuT/INpPNb8rJ6tKAjRgeFCKtAAg43+PACem +VrlwuV+KlG+VjH9Wlyb5Si1SNwCB8UEssOxijMYfiC/C8fyAOCE7C6p4HUqRiH+/ +56BKOI1nDvgNcjP5MnwMLB0aAAOgA4fV9Kjrt/IeV08TOmp6HSwlKON9WraN9Thp +Gp8CggEBAIeGkjASPQsy41wK+9TFY2tPfDFee1pJ22JywGYasK1ZuZh/003bOYjs +cg4fpp0/1/yYe+Xgebb3xzHIUlauRMiNQUPFAQTTWlUnGyHVuTpxEVbLhcqY2FG/ +t5SPgmu1H31bdfpA4LoA2ewLFeGIjKQOTMX5aCgPyZaqW/BAG0BcPEntYlLJpGbG +zSPIw8qUL3n/Bm0zpI3SrcUQoe0qOVr6UdeGTNO0dCkhED53ZzvoeMjsBv2IGegC +OPGzJCiW8NYquIRXSu0N9MHPYYy9XJU8rwkdOPyzNMvw0duBedT9wY3cimAD3KtQ +MTfJlrjd23Xn+aEmf/4M35SFl7OFxts= +-----END PRIVATE KEY----- diff --git a/tests/utilities/insecure-test.pem b/tests/utilities/insecure-test.pem new file mode 100644 index 00000000..6a1469ba --- /dev/null +++ b/tests/utilities/insecure-test.pem @@ -0,0 +1,30 @@ +-----BEGIN CERTIFICATE----- +MIIFFzCCAv8CFBtqKeSAcQf/bQBPZaROIpbzIQ7UMA0GCSqGSIb3DQEBCwUAMEgx +CzAJBgNVBAYTAlVTMREwDwYDVQQIDAhDb2xvcmFkbzEPMA0GA1UEBwwGRGVudmVy +MRUwEwYDVQQKDAxjbG91ZHBhdGhsaWIwHhcNMjQwOTEzMTExNzQzWhcNMzMxMTMw +MTExNzQzWjBIMQswCQYDVQQGEwJVUzERMA8GA1UECAwIQ29sb3JhZG8xDzANBgNV +BAcMBkRlbnZlcjEVMBMGA1UECgwMY2xvdWRwYXRobGliMIICIjANBgkqhkiG9w0B +AQEFAAOCAg8AMIICCgKCAgEArk+8wpI/jpJ/7yQQURB1syRyv06mHWnM8rrIhHBY +krYJFN24spiPN6K/JLeuiop/ZErwIjf9o9F0wQ0AN0CVS9GR8djLVXIl7L72S5n8 +Nl9yYxdxSQ1+J7O/Y+W0ZIvR4ZiVYIIfawBlznUtmsHB+eZrU0BeKoN7TIPfqLMz +EgqCgZD4Ohq3E2FBVimsM6LhVFbuq3DpXZj/OoEVdy2MFV4vISbhzQwt1RAj8W3D +WZy20+YVaB+Qa44R4N0vscVNtGPo+nMgLiaHU5IM42a/jnldJCaJrh6CsNJukisE +4fwCXd+6QPxH4HC/S2FaNV7MVRALGLKd8BKrouFv+SpOn3ddxbDgNVpU+jHEZQOo +OB8+rhcbGspBHdqH3hz/uPKHPiKHq13kot/iG8TErvuLeXrV2Cpb/zq6PuCAkSRZ +wABj9JWdmd5lIQKUzzY/9jIZq9NPPDqJuLWfbNpGdGO7MSqILoL3ZWhiVYUXhLnS +j+AeEIdhZvvvkgi3wIBiqqvIEes3Sva+8lVTqU6YdOyd9tW/5fMaA2ugE/02w/WM +MkDThcYwKroNPwHlXkY7qRWfftSpvkRHpFMFPGVweHqiq3N2geVXEyN0vCEc8C6D +8NNmfhrVv3NPIrVipLM0gzx5X9ySbJIblgxklCi8wd3thXMHiGOr2RW6nFpt8+GM +gfECAwEAATANBgkqhkiG9w0BAQsFAAOCAgEAVIRLRR5bitzThcTsmSCPAzqbVCf1 +HSsTWGnISwiI3GD+2d+TykY+g9fw2eKbXzbfHu9VHFAMdpHfQc7Ud3d+tM45LnCo +cnvdXrpQg2EEdZaFJ76SmFMFoAnMd9LkuSdzt0P28nOlXVn/KDFp2ea8ROUUaM55 +oGjo6Cj7i9h5fEnuAEE2Gcepjp9DRjJRIuwAxcihEcQSxzv4mOHqwMuCk6dpOG5S +MgVoCMiWz/9vn9U+Vyn5cjTzLgbmEQPVm5BL57QfPUhFW8cAMR5NeIeizLSpiBZQ ++RvzW/S2T+s8Cc0GgUjgiAmOLRCVMLTJ+jv1KvWFzu762POqXpreTD9UGLHnUvxI +RbhEgxj8p4169CeJSa0A19U6pFWFsZU2MLJkjHTIGlpzk5Vg5qzMyybcbk9wQQZ/ +CMOg5pVaCZHyTUwrFxKF51oIv9a/tuQSe/ryj8GIj7t0mq0+7klvEn1a6wrkSr73 +FzMNaEm4eLRVWYbHj8m4314vvaDjtUXCcMDRLb8j3fjyrcPPTkbO99rt1jVfU5wS +Ji7tVksGrTIHHlWkqZdbPhfZyTBIG34FjtjSClNVsOBeX+VqUuku8uQaM/9iVNZS +QamZuURGQ1x5+XHMjUQpoqAII+zXegJ1RiVfequYcF7F0bermVVVGdb/Ly2yNH1F +O5/LKKZ32+d5sm4= +-----END CERTIFICATE----- From af32f2d320090abc521dce5e50703ab80b9552a4 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Tue, 17 Sep 2024 17:53:18 -0400 Subject: [PATCH 04/26] Change dir detection to / --- cloudpathlib/cloudpath.py | 2 +- cloudpathlib/http/httpclient.py | 9 ++++++++- cloudpathlib/http/httppath.py | 21 +++++++++++++++------ tests/test_cloudpath_file_io.py | 12 ++++++++---- tests/test_cloudpath_instantiation.py | 2 +- tests/test_cloudpath_upload_copy.py | 20 ++++++++++---------- tests/test_s3_specific.py | 2 +- 7 files changed, 44 insertions(+), 24 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 1e93e89d..7a8f88a6 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -1223,7 +1223,7 @@ def copytree(self, destination, force_overwrite_to_cloud=None, ignore=None): ) elif subpath.is_dir(): subpath.copytree( - destination / subpath.name, + destination / (subpath.name + ("" if subpath.name.endswith("/") else "/")), force_overwrite_to_cloud=force_overwrite_to_cloud, ignore=ignore, ) diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py index 1cb7dc2c..3f7e1ea4 100644 --- a/cloudpathlib/http/httpclient.py +++ b/cloudpathlib/http/httpclient.py @@ -27,6 +27,7 @@ def __init__( content_type_method: Optional[Callable] = mimetypes.guess_type, auth: Optional[urllib.request.BaseHandler] = None, custom_list_page_parser: Optional[Callable[[str], Iterable[str]]] = None, + custom_dir_matcher: Optional[Callable[[str], bool]] = None, ): super().__init__(file_cache_mode, local_cache_dir, content_type_method) self.auth = auth @@ -37,6 +38,12 @@ def __init__( self.opener = urllib.request.build_opener(self.auth) self.custom_list_page_parser = custom_list_page_parser + + self.dir_matcher = ( + custom_dir_matcher + if custom_dir_matcher is not None else + lambda x: x.endswith("/") + ) def _get_metadata(self, cloud_path: HttpPath) -> dict: with self.opener.open(cloud_path.as_url()) as response: @@ -147,7 +154,7 @@ def _simple_links(html: str) -> Iterable[str]: ) yield from ( - (self.CloudPath((urllib.parse.urljoin(base_url, match))), Path(match).suffix == "") + (self.CloudPath((urllib.parse.urljoin(base_url, match))), self.dir_matcher(match)) for match in parser(response) ) diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py index e8953247..26021007 100644 --- a/cloudpathlib/http/httppath.py +++ b/cloudpathlib/http/httppath.py @@ -1,5 +1,5 @@ from pathlib import PurePosixPath -from typing import Tuple, Union, Optional +from typing import Any, Tuple, Union, Optional import os from pathlib import Path @@ -31,6 +31,17 @@ def __init__( else PurePosixPath(f"/{self._url.path}") ) + def _dispatch_to_path(self, func: str, *args, **kwargs) -> Any: + sup = super()._dispatch_to_path(func, *args, **kwargs) + + # some dispatch methods like "__truediv__" strip trailing slashes; + # for http paths, we need to keep them to indicate directories + if func == "__truediv__" and str(args[0]).endswith("/"): + return self._new_cloudpath(str(sup) + "/") + + else: + return sup + @property def drive(self) -> str: # For HTTP paths, no drive; use .anchor for scheme + netloc @@ -49,16 +60,14 @@ def is_dir(self) -> bool: if not self.exists(): return False - # HTTP doesn't really have directories, but some servers might list files if treated as such - # Here we'll assume paths without are dirs - return self._path.suffix == "" + # Use client default to iden + return self.client.dir_matcher(str(self)) def is_file(self) -> bool: if not self.exists(): return False - # HTTP doesn't have a direct file check, but we assume if it has a suffix, it's a file - return self._path.suffix != "" + return not self.client.dir_matcher(str(self)) def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: pass # no-op for HTTP Paths diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index 5da8c3fe..ea636574 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -98,12 +98,12 @@ def _make_glob_directory(root): (root / "dirC" / "fileC.txt").write_text("fileC") (root / "fileA.txt").write_text("fileA") - cloud_root = rig.create_cloud_path("glob-tests") + cloud_root = rig.create_cloud_path("glob-tests/") cloud_root.mkdir() _make_glob_directory(cloud_root) - local_root = tmp_path / "glob-tests" + local_root = tmp_path / "glob-tests/" local_root.mkdir() _make_glob_directory(local_root) @@ -116,7 +116,7 @@ def _make_glob_directory(root): def _lstrip_path_root(path, root): rel_path = str(path)[len(str(root)) :] - return rel_path.rstrip("/") # agnostic to trailing slash + return rel_path.strip("/") def _assert_glob_results_match(cloud_results, local_results, cloud_root, local_root): @@ -342,6 +342,10 @@ def test_is_dir_is_file(rig, tmp_path): dir_nested_no_slash = rig.create_cloud_path("dir_1/dir_1_0") for test_case in [dir_slash, dir_no_slash, dir_nested_slash, dir_nested_no_slash]: + # skip no-slash cases, which are interpreted as files for http paths + if not str(test_case).endswith("/") and rig.path_class in [HttpPath]: + continue + assert test_case.is_dir() assert not test_case.is_file() @@ -472,7 +476,7 @@ def test_cloud_path_download_to(rig, tmp_path): def test_fspath(rig): - p = rig.create_cloud_path("dir_0") + p = rig.create_cloud_path("dir_0/") assert os.fspath(p) == p.fspath diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index 15faf25f..4a98a549 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -114,7 +114,7 @@ def test_dependencies_not_loaded(rig, monkeypatch): def test_is_pathlike(rig): - p = rig.create_cloud_path("dir_0") + p = rig.create_cloud_path("dir_0/") assert isinstance(p, os.PathLike) diff --git a/tests/test_cloudpath_upload_copy.py b/tests/test_cloudpath_upload_copy.py index 6e447f49..e913d23f 100644 --- a/tests/test_cloudpath_upload_copy.py +++ b/tests/test_cloudpath_upload_copy.py @@ -77,7 +77,7 @@ def test_upload_from_file(rig, upload_assets_dir): assert p.read_text() == "Hello from 1" # to dir, dir exists - p = rig.create_cloud_path("dir_0") # created by fixtures + p = rig.create_cloud_path("dir_0/") # created by fixtures assert p.exists() p.upload_from(upload_assets_dir / "upload_1.txt") assert (p / "upload_1.txt").exists() @@ -93,7 +93,7 @@ def test_upload_from_dir(rig, upload_assets_dir): assert assert_mirrored(p, upload_assets_dir) # to dir, dir exists - p2 = rig.create_cloud_path("dir_0") # created by fixtures + p2 = rig.create_cloud_path("dir_0/") # created by fixtures assert p2.exists() p2.upload_from(upload_assets_dir) @@ -149,7 +149,7 @@ def test_copy(rig, upload_assets_dir, tmpdir): assert p_new.read_text() == "Hello from 1" # cloud to cloud directory - cloud_dest = rig.create_cloud_path("dir_1") # created by fixtures + cloud_dest = rig.create_cloud_path("dir_1/") # created by fixtures p_new = p.copy(cloud_dest) assert str(p_new) == str(p_new.parent / p.name) # file created assert p_new.exists() @@ -196,7 +196,7 @@ def test_copy(rig, upload_assets_dir, tmpdir): (other_dir / p2.name).unlink() # cloud dir raises - cloud_dir = rig.create_cloud_path("dir_1") # created by fixtures + cloud_dir = rig.create_cloud_path("dir_1/") # created by fixtures with pytest.raises(ValueError) as e: p_new = cloud_dir.copy(Path(tmpdir.mkdir("test_copy_dir_fails"))) assert "use the method copytree" in str(e) @@ -210,12 +210,12 @@ def test_copytree(rig, tmpdir): p.copytree(local_out) with pytest.raises(CloudPathFileExistsError): - p = rig.create_cloud_path("dir_0") + p = rig.create_cloud_path("dir_0/") p_out = rig.create_cloud_path("dir_0/file0_0.txt") p.copytree(p_out) # cloud dir to local dir that exists - p = rig.create_cloud_path("dir_1") + p = rig.create_cloud_path("dir_1/") local_out = Path(tmpdir.mkdir("copytree_from_cloud")) p.copytree(local_out) assert assert_mirrored(p, local_out) @@ -231,12 +231,12 @@ def test_copytree(rig, tmpdir): assert assert_mirrored(p, local_out) # cloud dir to cloud dir that does not exist - p2 = rig.create_cloud_path("new_dir") + p2 = rig.create_cloud_path("new_dir/") p.copytree(p2) assert assert_mirrored(p2, p) # cloud dir to cloud dir that exists - p2 = rig.create_cloud_path("new_dir2") + p2 = rig.create_cloud_path("new_dir2/") (p2 / "existing_file.txt").write_text("asdf") # ensures p2 exists p.copytree(p2) assert assert_mirrored(p2, p, check_no_extra=False) @@ -254,7 +254,7 @@ def test_copytree(rig, tmpdir): (p / "dir2" / "file2.txt").write_text("ignore") # cloud dir to local dir but ignoring files (shutil.ignore_patterns) - p3 = rig.create_cloud_path("new_dir3") + p3 = rig.create_cloud_path("new_dir3/") p.copytree(p3, ignore=ignore_patterns("*.py", "dir*")) assert assert_mirrored(p, p3, check_no_extra=False) assert not (p3 / "ignored.py").exists() @@ -262,7 +262,7 @@ def test_copytree(rig, tmpdir): assert not (p3 / "dir2").exists() # cloud dir to local dir but ignoring files (custom function) - p4 = rig.create_cloud_path("new_dir4") + p4 = rig.create_cloud_path("new_dir4/") def _custom_ignore(path, names): ignore = [] diff --git a/tests/test_s3_specific.py b/tests/test_s3_specific.py index 4b12f7b9..45d2bfea 100644 --- a/tests/test_s3_specific.py +++ b/tests/test_s3_specific.py @@ -176,7 +176,7 @@ def test_directories(s3_like_rig): assert super_path.exists() assert not super_path.is_dir() - super_path = s3_like_rig.create_cloud_path("dir_0") + super_path = s3_like_rig.create_cloud_path("dir_0/") assert super_path.exists() assert super_path.is_dir() From 12b685aa1ac579fd461e78200f065db9adc9c584 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Tue, 17 Sep 2024 17:53:47 -0400 Subject: [PATCH 05/26] lint --- cloudpathlib/cloudpath.py | 2 +- cloudpathlib/http/httpclient.py | 6 ++---- cloudpathlib/http/httppath.py | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 7a8f88a6..b5cade32 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -1223,7 +1223,7 @@ def copytree(self, destination, force_overwrite_to_cloud=None, ignore=None): ) elif subpath.is_dir(): subpath.copytree( - destination / (subpath.name + ("" if subpath.name.endswith("/") else "/")), + destination / (subpath.name + ("" if subpath.name.endswith("/") else "/")), force_overwrite_to_cloud=force_overwrite_to_cloud, ignore=ignore, ) diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py index 3f7e1ea4..b9febac7 100644 --- a/cloudpathlib/http/httpclient.py +++ b/cloudpathlib/http/httpclient.py @@ -38,11 +38,9 @@ def __init__( self.opener = urllib.request.build_opener(self.auth) self.custom_list_page_parser = custom_list_page_parser - + self.dir_matcher = ( - custom_dir_matcher - if custom_dir_matcher is not None else - lambda x: x.endswith("/") + custom_dir_matcher if custom_dir_matcher is not None else lambda x: x.endswith("/") ) def _get_metadata(self, cloud_path: HttpPath) -> dict: diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py index 26021007..96e16033 100644 --- a/cloudpathlib/http/httppath.py +++ b/cloudpathlib/http/httppath.py @@ -38,7 +38,7 @@ def _dispatch_to_path(self, func: str, *args, **kwargs) -> Any: # for http paths, we need to keep them to indicate directories if func == "__truediv__" and str(args[0]).endswith("/"): return self._new_cloudpath(str(sup) + "/") - + else: return sup From bf97bafde1d2f13484873bb73441ab6bd1ec0558 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 14 Feb 2025 12:30:22 -0800 Subject: [PATCH 06/26] Update https tests --- tests/http_fixtures.py | 24 +++++++++ tests/test_client.py | 4 +- tests/test_cloudpath_file_io.py | 14 ++--- tests/test_cloudpath_instantiation.py | 6 +-- tests/test_cloudpath_manipulation.py | 8 +-- tests/test_cloudpath_upload_copy.py | 4 +- tests/utilities/insecure-test.crt | 19 +++++++ tests/utilities/insecure-test.csr | 27 ---------- tests/utilities/insecure-test.key | 76 +++++++++------------------ tests/utilities/insecure-test.pem | 45 ++++++---------- 10 files changed, 104 insertions(+), 123 deletions(-) create mode 100644 tests/utilities/insecure-test.crt delete mode 100644 tests/utilities/insecure-test.csr diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index 754d4a65..c3e7b3f9 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -125,6 +125,14 @@ def https_server(tmp_path_factory, worker_id): server_dir = tmp_path_factory.mktemp("server_files").resolve() + # Command for generating self-signed localhost cert + # openssl req -x509 -out localhost.crt -keyout localhost.key \ + # -newkey rsa:2048 -nodes -sha256 \ + # -subj '/CN=localhost' -extensions EXT -config <( \ + # printf "[dn]\nCN=localhost\n[req]\ndistinguished_name = dn\n[EXT]\nsubjectAltName=DNS:localhost\nkeyUsage=digitalSignature\nextendedKeyUsage=serverAuth") + # + # openssl x509 -in localhost.crt -out localhost.pem -outform PEM + host, server_thread = _http_server( server_dir, port, @@ -133,8 +141,24 @@ def https_server(tmp_path_factory, worker_id): keyfile=utilities_dir / "insecure-test.key", ) + # Add this self-signed cert at the library level so it is used in tests + _original_create_context = ssl._create_default_https_context + + def _create_context_with_self_signed_cert(*args, **kwargs): + context = _original_create_context(*args, **kwargs) + context.load_cert_chain( + certfile=utilities_dir / "insecure-test.pem", + keyfile=utilities_dir / "insecure-test.key", + ) + context.load_verify_locations(cafile=utilities_dir / "insecure-test.pem") + return context + + ssl._create_default_https_context = _create_context_with_self_signed_cert + yield host, server_dir + ssl._create_default_https_context = _original_create_context + server_thread.join(0) if server_dir.exists(): diff --git a/tests/test_client.py b/tests/test_client.py index 78505696..3eceafc8 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -9,7 +9,7 @@ from cloudpathlib import CloudPath from cloudpathlib.client import register_client_class from cloudpathlib.cloudpath import implementation_registry, register_path_class -from cloudpathlib.http.httpclient import HttpClient +from cloudpathlib.http.httpclient import HttpClient, HttpsClient from cloudpathlib.s3.s3client import S3Client from cloudpathlib.s3.s3path import S3Path @@ -97,7 +97,7 @@ def _test_write_content_type(suffix, expected, rig_ref, check=True): for suffix, content_type in mimes: _test_write_content_type(suffix, content_type, rig, check=False) - if rig.client_class is HttpClient: + if rig.client_class in [HttpClient, HttpsClient]: # HTTP client doesn't support custom content types return diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index ea636574..efda2156 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -14,8 +14,8 @@ CloudPathNotImplementedError, DirectoryNotEmptyError, ) -from cloudpathlib.http.httpclient import HttpClient -from cloudpathlib.http.httppath import HttpPath +from cloudpathlib.http.httpclient import HttpClient, HttpsClient +from cloudpathlib.http.httppath import HttpPath, HttpsPath def test_file_discovery(rig): @@ -27,7 +27,7 @@ def test_file_discovery(rig): p2.touch() assert p2.exists() - if rig.client_class not in [HttpClient]: # not supported to touch existing + if rig.client_class not in [HttpClient, HttpsClient]: # not supported to touch existing p2.touch(exist_ok=True) else: with pytest.raises(NotImplementedError): @@ -189,7 +189,7 @@ def test_walk(glob_test_dirs): def test_list_buckets(rig): - if rig.path_class in [HttpPath]: + if rig.path_class in [HttpPath, HttpsPath]: return # no bucket listing for HTTP # test we can list buckets @@ -343,7 +343,7 @@ def test_is_dir_is_file(rig, tmp_path): for test_case in [dir_slash, dir_no_slash, dir_nested_slash, dir_nested_no_slash]: # skip no-slash cases, which are interpreted as files for http paths - if not str(test_case).endswith("/") and rig.path_class in [HttpPath]: + if not str(test_case).endswith("/") and rig.path_class in [HttpPath, HttpsPath]: continue assert test_case.is_dir() @@ -383,7 +383,7 @@ def test_file_read_writes(rig, tmp_path): before_touch = datetime.now() sleep(1) - if rig.path_class not in [HttpPath]: # not supported to touch existing + if rig.path_class not in [HttpPath, HttpsPath]: # not supported to touch existing p.touch() if not getattr(rig, "is_custom_s3", False): @@ -394,7 +394,7 @@ def test_file_read_writes(rig, tmp_path): if not getattr(rig, "is_adls_gen2", False): p.mkdir() - if rig.path_class not in [HttpPath]: # not supported to touch existing + if rig.path_class not in [HttpPath, HttpsPath]: # not supported to touch existing assert p.etag is not None dest = rig.create_cloud_path("dir2/new_file0_0.txt") diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index 4a98a549..4f7cdf5d 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -7,7 +7,7 @@ from cloudpathlib import AzureBlobPath, CloudPath, GSPath, S3Path from cloudpathlib.exceptions import InvalidPrefixError, MissingDependenciesError -from cloudpathlib.http.httppath import HttpPath +from cloudpathlib.http.httppath import HttpPath, HttpsPath @pytest.mark.parametrize( @@ -46,7 +46,7 @@ def test_dispatch_error(): @pytest.mark.parametrize("path", ["b/k", "b/k", "b/k.file", "b/k", "b"]) def test_instantiation(rig, path): - if rig.path_class in [HttpPath]: + if rig.path_class in [HttpPath, HttpsPath]: path = "example-url.com/" + path # check two cases of prefix @@ -56,7 +56,7 @@ def test_instantiation(rig, path): assert repr(p) == f"{rig.path_class.__name__}('{expected}')" assert str(p) == expected - if rig.path_class in [HttpPath]: + if rig.path_class in [HttpPath, HttpsPath]: assert p._no_prefix == path.replace("example-url.com/", "") assert str(p._path) == path.replace("example-url.com", "") diff --git a/tests/test_cloudpath_manipulation.py b/tests/test_cloudpath_manipulation.py index 94d38191..553dca49 100644 --- a/tests/test_cloudpath_manipulation.py +++ b/tests/test_cloudpath_manipulation.py @@ -5,7 +5,7 @@ import pytest from cloudpathlib import CloudPath -from cloudpathlib.http.httppath import HttpPath +from cloudpathlib.http.httppath import HttpPath, HttpsPath def test_properties(rig): @@ -85,12 +85,12 @@ def test_joins(rig): if sys.version_info >= (3, 12): assert rig.create_cloud_path("a/b/c/d").match("A/*/C/D", case_sensitive=False) - if rig.path_class not in [HttpPath]: + if rig.path_class not in [HttpPath, HttpsPath]: assert rig.create_cloud_path("a/b/c/d").anchor == rig.cloud_prefix assert rig.create_cloud_path("a/b/c/d").parent == rig.create_cloud_path("a/b/c") - if rig.path_class not in [HttpPath]: + if rig.path_class not in [HttpPath, HttpsPath]: assert rig.create_cloud_path("a/b/c/d").parents == ( rig.create_cloud_path("a/b/c"), rig.create_cloud_path("a/b"), @@ -119,7 +119,7 @@ def test_joins(rig): == f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}/a/b/c" ) - if rig.path_class in [HttpPath]: + if rig.path_class in [HttpPath, HttpsPath]: assert rig.create_cloud_path("a/b/c/d").parts == ( rig.cloud_prefix + rig.drive + "/", rig.test_dir, diff --git a/tests/test_cloudpath_upload_copy.py b/tests/test_cloudpath_upload_copy.py index e913d23f..06710590 100644 --- a/tests/test_cloudpath_upload_copy.py +++ b/tests/test_cloudpath_upload_copy.py @@ -4,7 +4,7 @@ import pytest -from cloudpathlib.http.httppath import HttpPath +from cloudpathlib.http.httppath import HttpPath, HttpsPath from cloudpathlib.local import LocalGSPath, LocalS3Path, LocalS3Client from cloudpathlib.exceptions import ( CloudPathFileExistsError, @@ -137,7 +137,7 @@ def test_copy(rig, upload_assets_dir, tmpdir): p_new = p.copy(p.parent / "new_upload_1.txt") assert p_new.exists() - if rig.path_class not in [HttpPath]: + if rig.path_class not in [HttpPath, HttpsPath]: assert not p_new._local.exists() # cache should never have been downloaded assert not p._local.exists() # cache should never have been downloaded assert p_new.read_text() == "Hello from 1" diff --git a/tests/utilities/insecure-test.crt b/tests/utilities/insecure-test.crt new file mode 100644 index 00000000..d67dbdcc --- /dev/null +++ b/tests/utilities/insecure-test.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDDzCCAfegAwIBAgIUBUa66W9WhdTxm3BiIlfjGN4HjLkwDQYJKoZIhvcNAQEL +BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTI1MDIwMzAyMzIwOFoXDTI1MDMw +NTAyMzIwOFowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF +AAOCAQ8AMIIBCgKCAQEAslbXVimtI9/ndG2vPKt7VXKF6cWegxH9Jkjn+2O1G8ZQ +8cQKLlS7zfqLN7VXVOIVtun1/ZK/saW7Ce2usB21VPDKAWGLtAZiIb5lhT25sljU +u+OLlaSqMD4rMdYgr6rfNnszDQfZ/9J4Jt5WZU3GZj0UguXHfUg/fVprRYmM4zxy +9kGxuERErDz3mISkLKkA2Xgo+1Y4aCTTmLCZYF5F1DitC39dB6efd98Q+Gz7eI1R +fzlGYaEq4ISrIy1h8UnLQmVoNfxTSbuZJp9kmTHqefynxWQQDlfLJ4yeLB4rOMlQ +ZOE/829Pc0nFZSLoeha2f9S6CQ1nFIhGNwQuJYOhTwIDAQABo1kwVzAUBgNVHREE +DTALgglsb2NhbGhvc3QwCwYDVR0PBAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMB +MB0GA1UdDgQWBBTD07Fp8N4xue7MMVrlI77W8Hpi1zANBgkqhkiG9w0BAQsFAAOC +AQEAjl4O2sDgEoe26OaxA8ICwemoaQh/uobVSwWejtNgrId8V8g8HQFNt3M3qbPk +mi8ymwmbs9NuiDK8DpWrIiXIT/22DUFJ/mfNlRB5FBK0ACwWfLs+lDrqD+Xdz/FP +ZT07xFgByrSXkBz3Av1Zrm8PzjLAuA7Ar/XYnkdwwmlNujFx+W92gSkGUduPE51Y +qCO0pFoG6TVhMv6kIQqqefblDv32yiMB+Yki/xYiR/6kRfOim8oXbwsZZvI1/QI6 +76Pyw57+2ZO9hREZRRuhtz0Ku0BFOeFI2/K2D+RDGQZ8mEfO6z/OQ3D1lB+apYDa +1uhrG0/cbVccEQDqfjiliHJuEQ== +-----END CERTIFICATE----- diff --git a/tests/utilities/insecure-test.csr b/tests/utilities/insecure-test.csr deleted file mode 100644 index 01625734..00000000 --- a/tests/utilities/insecure-test.csr +++ /dev/null @@ -1,27 +0,0 @@ ------BEGIN CERTIFICATE REQUEST----- -MIIEqjCCApICAQAwSDELMAkGA1UEBhMCVVMxETAPBgNVBAgMCENvbG9yYWRvMQ8w -DQYDVQQHDAZEZW52ZXIxFTATBgNVBAoMDGNsb3VkcGF0aGxpYjCCAiIwDQYJKoZI -hvcNAQEBBQADggIPADCCAgoCggIBAK5PvMKSP46Sf+8kEFEQdbMkcr9Oph1pzPK6 -yIRwWJK2CRTduLKYjzeivyS3roqKf2RK8CI3/aPRdMENADdAlUvRkfHYy1VyJey+ -9kuZ/DZfcmMXcUkNfiezv2PltGSL0eGYlWCCH2sAZc51LZrBwfnma1NAXiqDe0yD -36izMxIKgoGQ+DoatxNhQVYprDOi4VRW7qtw6V2Y/zqBFXctjBVeLyEm4c0MLdUQ -I/Ftw1mcttPmFWgfkGuOEeDdL7HFTbRj6PpzIC4mh1OSDONmv455XSQmia4egrDS -bpIrBOH8Al3fukD8R+Bwv0thWjVezFUQCxiynfASq6Lhb/kqTp93XcWw4DVaVPox -xGUDqDgfPq4XGxrKQR3ah94c/7jyhz4ih6td5KLf4hvExK77i3l61dgqW/86uj7g -gJEkWcAAY/SVnZneZSEClM82P/YyGavTTzw6ibi1n2zaRnRjuzEqiC6C92VoYlWF -F4S50o/gHhCHYWb775IIt8CAYqqryBHrN0r2vvJVU6lOmHTsnfbVv+XzGgNroBP9 -NsP1jDJA04XGMCq6DT8B5V5GO6kVn37Uqb5ER6RTBTxlcHh6oqtzdoHlVxMjdLwh -HPAug/DTZn4a1b9zTyK1YqSzNIM8eV/ckmySG5YMZJQovMHd7YVzB4hjq9kVupxa -bfPhjIHxAgMBAAGgHTAbBgkqhkiG9w0BCQcxDgwMY2xvdWRwYXRobGliMA0GCSqG -SIb3DQEBCwUAA4ICAQBeTRNKjo+ol3zuKfteuKt7+mLWzL3/qtUSCmCwt6t+NebN -ebkOwZA4HW0uUt/rdht58CJAvFri+DnkEidt/ldcg/CQ/tpWALZFdNa2z0hb+qEL -Q7wHO1QkwHG8/Q7yrcBNGSDsp4l7cH+8FQBcAVJxn++ixTe4dIiyscUdNRkXywsT -/UdQlK3oULR7Zv9k3nDErXTow/6QazjxtUyrfyuFdSDTAKJaKCOLt5NcJif/Ev3G -rUMJQElNz3W0P73ci+ueuihYdaveDx1vptO9VCBnwFOyTgjCYPS9g3MB8KIh5cJz -sj2J5J5tEUsyAa8ky4hvoLyP7GE29XvPA8pH1rOtQ++lmMzpP1vkPEGe0ezXrw2y -h4LBJXeMCg3/r3otEHnppI5PRTX3m1WlHyInpFIjets6VLDKjwENyreDmO5hIfRd -4ZxjxYzG97Tekoa+v9Y9qf3YCCGvbswOwfyj8hNheoMKv2f+rG2MwSPWfYlML/oT -4UA/C3o9Y7oa7H9FdEiTuXRgLcKUZqZJ0JuVhSbdPAAYSdrQE/EF06jyU6ZENxUu -0UJRwaXLETIIii99TUxyTmJTrvWAEbo5hpwfA1P6aaCLtWj0Qm6WSD3uLjU56yaX -6Q2kdspxv1BiT2TC4RO/ZH/8OwmSfe0dSg8jEOQf2+B0DcTPD+yHjo2hZWpT0A== ------END CERTIFICATE REQUEST----- diff --git a/tests/utilities/insecure-test.key b/tests/utilities/insecure-test.key index 2c6f5248..86fd4766 100644 --- a/tests/utilities/insecure-test.key +++ b/tests/utilities/insecure-test.key @@ -1,52 +1,28 @@ -----BEGIN PRIVATE KEY----- -MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQCuT7zCkj+Okn/v -JBBREHWzJHK/TqYdaczyusiEcFiStgkU3biymI83or8kt66Kin9kSvAiN/2j0XTB -DQA3QJVL0ZHx2MtVciXsvvZLmfw2X3JjF3FJDX4ns79j5bRki9HhmJVggh9rAGXO -dS2awcH55mtTQF4qg3tMg9+oszMSCoKBkPg6GrcTYUFWKawzouFUVu6rcOldmP86 -gRV3LYwVXi8hJuHNDC3VECPxbcNZnLbT5hVoH5BrjhHg3S+xxU20Y+j6cyAuJodT -kgzjZr+OeV0kJomuHoKw0m6SKwTh/AJd37pA/EfgcL9LYVo1XsxVEAsYsp3wEqui -4W/5Kk6fd13FsOA1WlT6McRlA6g4Hz6uFxsaykEd2ofeHP+48oc+IoerXeSi3+Ib -xMSu+4t5etXYKlv/Oro+4ICRJFnAAGP0lZ2Z3mUhApTPNj/2Mhmr0088Oom4tZ9s -2kZ0Y7sxKogugvdlaGJVhReEudKP4B4Qh2Fm+++SCLfAgGKqq8gR6zdK9r7yVVOp -Tph07J321b/l8xoDa6AT/TbD9YwyQNOFxjAqug0/AeVeRjupFZ9+1Km+REekUwU8 -ZXB4eqKrc3aB5VcTI3S8IRzwLoPw02Z+GtW/c08itWKkszSDPHlf3JJskhuWDGSU -KLzB3e2FcweIY6vZFbqcWm3z4YyB8QIDAQABAoICAAvBHOdCSd7CTLYZY/kkl4D8 -sbkO+nOumUPx3F+MynaKzGKw5lczESpz7EaFp4pl9zdbE7yFRVchK/LeIzXNeSz8 -ecZXeCGP3A/XH+cgyTUdJr0GmuEf4XIpyzKN+8qoO+0KfhjKb9GBuABZdYKaSh2z -S2kLRMnCaip3FKgKjzbclm1auA8F3E50CWc7rXPYhXk5RqQxG6gUoVaNRR+BnbVy -T4kl+7gv9/09NsBrIcqTQ97pKWf03zl7y3D8DfODkVhbQLAttfa/4V/Y0BRkuAEk -wYumvVh6IvGQRNxjK0F6y8U0EmNSLYt+yAQgyENIXEzobozXmFtU1dX/fZxNix7n -9fRXFBjOHVJNyW2nYgdVPeENbG+3u68baVsYG8sjsbk6XJyh9SMozEPaOCIQGWcr -pFz9yZb2rCZKvqlz09Qnhx1TKblMnUkC1VmVXLZOgylhJY12aueibNpaPw6LHPu1 -8JUnN0e2PIUjl4wWn6GPmkN+PSMm6khUTwYZx199fC9QFuxkij1qG5iQwvvsuMIH -gxvjO3XP2RAR01UNxhPPG+PgM6g3TBCfRd2B21toKgKNC9kzwsVLg251czxeTVh1 -2/uK0h06MkqHl11fJvBrWKLUhsnpgNqMSGusDIvf9vA39LvJSVxAcE550/dhdbY9 -VSjPnS5jcsK7JA4RgJ3rAoIBAQD09k5m8H+pky3+RMP0BP0apsJk5feLFl++FQc0 -otLgPzKSBWPqdhY/2R/UJKBQEc28CkWrtoe5wr51gM3t/ytU4xltYY+o6MyaAChD -rtwhm62Uu0X/CA1G9FTmjQJkCmNybwHzaqoHZ4kEax3WVGx0FC6Zxp2rl/wIDYuJ -z1tls+MMsVAoeoDCoxpRzSxWqY4xeEROuJoEOPdesPCkUqqCga1rT6+I8IUA7lmb -wjrOD7RB3RyEuM5oxfIJBuXZKlgHGjF1M0eCo9xjQFZPCG2lkoNn5UJofEz8Ktbv -Cazx6YvHSMYuowEsonbuz2C3er2ydyCNIuE+n1oLGBz9RmKjAoIBAQC2KnWvhfM4 -sz31lxKDg5xPszU7wozItTWzMXsg6hXi/wIFtFc7Y23IY8al5WiUaO9fV42nOUDB -gNk684lsKPR144XE5jxUSzVqM9DCLj931fHpuAkmxr6bkhxnDMK37QQ3YUib68ca -nBucqozaoS15sdgzTc25xNWgPuLHxq3wVBi1bELbSgLrrWVHr8hB3xTLF1WbCLxC -RlNlSc7EnJ841xx1mZmTwxsWG+bHfs6NjgD4zVqbjLSj5Orv8f0pD4AE8pyISlr+ -+rJTT6iaHQvCKMYv4Ynfa74YA168BBR+9IcstrIkdno25uHOXDb97V32ab5S3yFW -YlRE0lEHA+ZbAoIBADrPX2dLWfrmQOaIr9rHz9Q0MPt0Uofr6TSDa+VxKdt4kLWJ -4cEKdLEDeaa+3FYc0B3TAYMGIXOxk3Q2Zjo7RrXsh9BA2fKdYXGflAsb0fGnpHbO -tzFRR46/Xhqzw90suU9h40ADXarFapnK9bDdN+Rua/mzO2tU48czKUr+o1y5YUtM -zofJUVxpOApnjbuInYC29P9JRoC5BHqRVFS/G/yVEYNv8B6aT/Q3RQAmE2QhVQ9y -/EPI8pUo4MDWDRykE9owqasPkp2EpYaWjaIPzfMwR6gL3HOlU/4+creUxRaXEV3Y -1OuhasjCgHc5BmlGaICOJRx9QUJ9k2qScXNFEK0CggEBALYazhkQdCtLa/YV9wkH -yXwXL3E1NJ30IOGo5mjp+cU5neDr/oQ9CmNX8x6mrhwmpp0OHmp8HpMSZpa7HLbG -XlN3hqNmcgrmQFiRwfBMYWA/vR0iv+tGpQdKUiBmLkXFqABgvUA5vImKY0QDbtmk -ZJySQApRjgZWkiQmmXiS0hE9UJIUzuT/INpPNb8rJ6tKAjRgeFCKtAAg43+PACem -VrlwuV+KlG+VjH9Wlyb5Si1SNwCB8UEssOxijMYfiC/C8fyAOCE7C6p4HUqRiH+/ -56BKOI1nDvgNcjP5MnwMLB0aAAOgA4fV9Kjrt/IeV08TOmp6HSwlKON9WraN9Thp -Gp8CggEBAIeGkjASPQsy41wK+9TFY2tPfDFee1pJ22JywGYasK1ZuZh/003bOYjs -cg4fpp0/1/yYe+Xgebb3xzHIUlauRMiNQUPFAQTTWlUnGyHVuTpxEVbLhcqY2FG/ -t5SPgmu1H31bdfpA4LoA2ewLFeGIjKQOTMX5aCgPyZaqW/BAG0BcPEntYlLJpGbG -zSPIw8qUL3n/Bm0zpI3SrcUQoe0qOVr6UdeGTNO0dCkhED53ZzvoeMjsBv2IGegC -OPGzJCiW8NYquIRXSu0N9MHPYYy9XJU8rwkdOPyzNMvw0duBedT9wY3cimAD3KtQ -MTfJlrjd23Xn+aEmf/4M35SFl7OFxts= +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCyVtdWKa0j3+d0 +ba88q3tVcoXpxZ6DEf0mSOf7Y7UbxlDxxAouVLvN+os3tVdU4hW26fX9kr+xpbsJ +7a6wHbVU8MoBYYu0BmIhvmWFPbmyWNS744uVpKowPisx1iCvqt82ezMNB9n/0ngm +3lZlTcZmPRSC5cd9SD99WmtFiYzjPHL2QbG4RESsPPeYhKQsqQDZeCj7VjhoJNOY +sJlgXkXUOK0Lf10Hp5933xD4bPt4jVF/OUZhoSrghKsjLWHxSctCZWg1/FNJu5km +n2SZMep5/KfFZBAOV8snjJ4sHis4yVBk4T/zb09zScVlIuh6FrZ/1LoJDWcUiEY3 +BC4lg6FPAgMBAAECggEAF++H30ygrFv02K+QAXiSiRlh18pqR/U2INlQKXFscVng +q1JAe49r1W13GfcAzae6el5UYcA8cj9oJyD7fS+/krLuqdjJw1PH//dp8MQkFEfP +5ZfYuDDtlqQBcCLpKCqq4skORQV0wN0iuzTEK7yQkFBKXHcymyIDQ3PLSf34JtdQ +NlPrgg4k22h6YpFxZqyKkjGfVLZ3Xy34zhwoMO+9c+QpLWpUmK5h942tRbmJcL6s +TEKbBBX7lNrkcruMSW4Pa3hxtaVIkgCMzl3C5ESbGnKwyZoSXYSYLz7ZJ31t419D +n8pbd5QnT5I6kHNS1zjfDulK/atjA0MRfBzo69WToQKBgQD2YmLRzZ7+wEhR51/f +bjtegd91cBE/oc5QI1/peOzqCbE1XK3AXkl7MdnHgmvDnB/am9in5er6dP+zEk70 +zkXi8d4CCGr3rOTSNL9J/jYRT6qzrJ3W93LZyF8SLkdd+dlArm6WfmpZAd7NDjPY +rPaFWRmmZw2nelGtJ7VsF4JrmwKBgQC5TJ6s/C0ptCc8eqpqZrXGs3GKf1e2XpJa +IMTNypLRJlW5/pQV4UcDnttaW9h/KHG90SHGc8QrNw9ZsSyv0KP0nDQggAUgugbk +UDbqACoVfyK16/y09/UxDmm4+gneVfRl43GE9l6E8eWcaLDMq9bIf8d6V6fzVKbr +CncCD+k+XQKBgBcC/2nLA6OVf+uuaYTrvb3I8X79dY705NkVRLsAoVgGibeLLWs/ +IyvPbyC0LK57YXpAfg7IaZb04IDwH9G7hkl4/5w+/wac7k43wPn/NTuOrknTwHY4 +bFCs5zxCeEXttP3WjNufeP1RMGcEgZEmLmwr8zpYe6z9vq0mfUpYM8UrAoGADNyh +ovY6GEv18T++dnY8pdcsyWU/bFwyImll37bw6+2SQ7/E/aPObxTmuuXnKCRhiMBs +bL76XdmLijk/FdEQoFj7UUoxkOz4r35V4XYzdZyGh0QjHVicv1P1yK4qYzFqLmxU +I9uKAgZcXrfxlbNTzET6Tqao9L3qfCQU0KLUGM0CgYEAigDUTS/X8e+pzGAaHKrf +o5GrMEAb9yxYb9R+LVunTUKCil6fj6XeF05JeN+VwoY4KdBVRi0JF3zGPFtUuvkv +o1fsiY5jzQh3Q6nQRju1mthOsEQeii8n9xO4VxIPHxFFZpfTtIkgh1aC9GMkLyld +OsW4jP0Sd5izcqEuBtcZv4g= -----END PRIVATE KEY----- diff --git a/tests/utilities/insecure-test.pem b/tests/utilities/insecure-test.pem index 6a1469ba..d67dbdcc 100644 --- a/tests/utilities/insecure-test.pem +++ b/tests/utilities/insecure-test.pem @@ -1,30 +1,19 @@ -----BEGIN CERTIFICATE----- -MIIFFzCCAv8CFBtqKeSAcQf/bQBPZaROIpbzIQ7UMA0GCSqGSIb3DQEBCwUAMEgx -CzAJBgNVBAYTAlVTMREwDwYDVQQIDAhDb2xvcmFkbzEPMA0GA1UEBwwGRGVudmVy -MRUwEwYDVQQKDAxjbG91ZHBhdGhsaWIwHhcNMjQwOTEzMTExNzQzWhcNMzMxMTMw -MTExNzQzWjBIMQswCQYDVQQGEwJVUzERMA8GA1UECAwIQ29sb3JhZG8xDzANBgNV -BAcMBkRlbnZlcjEVMBMGA1UECgwMY2xvdWRwYXRobGliMIICIjANBgkqhkiG9w0B -AQEFAAOCAg8AMIICCgKCAgEArk+8wpI/jpJ/7yQQURB1syRyv06mHWnM8rrIhHBY -krYJFN24spiPN6K/JLeuiop/ZErwIjf9o9F0wQ0AN0CVS9GR8djLVXIl7L72S5n8 -Nl9yYxdxSQ1+J7O/Y+W0ZIvR4ZiVYIIfawBlznUtmsHB+eZrU0BeKoN7TIPfqLMz -EgqCgZD4Ohq3E2FBVimsM6LhVFbuq3DpXZj/OoEVdy2MFV4vISbhzQwt1RAj8W3D -WZy20+YVaB+Qa44R4N0vscVNtGPo+nMgLiaHU5IM42a/jnldJCaJrh6CsNJukisE -4fwCXd+6QPxH4HC/S2FaNV7MVRALGLKd8BKrouFv+SpOn3ddxbDgNVpU+jHEZQOo -OB8+rhcbGspBHdqH3hz/uPKHPiKHq13kot/iG8TErvuLeXrV2Cpb/zq6PuCAkSRZ -wABj9JWdmd5lIQKUzzY/9jIZq9NPPDqJuLWfbNpGdGO7MSqILoL3ZWhiVYUXhLnS -j+AeEIdhZvvvkgi3wIBiqqvIEes3Sva+8lVTqU6YdOyd9tW/5fMaA2ugE/02w/WM -MkDThcYwKroNPwHlXkY7qRWfftSpvkRHpFMFPGVweHqiq3N2geVXEyN0vCEc8C6D -8NNmfhrVv3NPIrVipLM0gzx5X9ySbJIblgxklCi8wd3thXMHiGOr2RW6nFpt8+GM -gfECAwEAATANBgkqhkiG9w0BAQsFAAOCAgEAVIRLRR5bitzThcTsmSCPAzqbVCf1 -HSsTWGnISwiI3GD+2d+TykY+g9fw2eKbXzbfHu9VHFAMdpHfQc7Ud3d+tM45LnCo -cnvdXrpQg2EEdZaFJ76SmFMFoAnMd9LkuSdzt0P28nOlXVn/KDFp2ea8ROUUaM55 -oGjo6Cj7i9h5fEnuAEE2Gcepjp9DRjJRIuwAxcihEcQSxzv4mOHqwMuCk6dpOG5S -MgVoCMiWz/9vn9U+Vyn5cjTzLgbmEQPVm5BL57QfPUhFW8cAMR5NeIeizLSpiBZQ -+RvzW/S2T+s8Cc0GgUjgiAmOLRCVMLTJ+jv1KvWFzu762POqXpreTD9UGLHnUvxI -RbhEgxj8p4169CeJSa0A19U6pFWFsZU2MLJkjHTIGlpzk5Vg5qzMyybcbk9wQQZ/ -CMOg5pVaCZHyTUwrFxKF51oIv9a/tuQSe/ryj8GIj7t0mq0+7klvEn1a6wrkSr73 -FzMNaEm4eLRVWYbHj8m4314vvaDjtUXCcMDRLb8j3fjyrcPPTkbO99rt1jVfU5wS -Ji7tVksGrTIHHlWkqZdbPhfZyTBIG34FjtjSClNVsOBeX+VqUuku8uQaM/9iVNZS -QamZuURGQ1x5+XHMjUQpoqAII+zXegJ1RiVfequYcF7F0bermVVVGdb/Ly2yNH1F -O5/LKKZ32+d5sm4= +MIIDDzCCAfegAwIBAgIUBUa66W9WhdTxm3BiIlfjGN4HjLkwDQYJKoZIhvcNAQEL +BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTI1MDIwMzAyMzIwOFoXDTI1MDMw +NTAyMzIwOFowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF +AAOCAQ8AMIIBCgKCAQEAslbXVimtI9/ndG2vPKt7VXKF6cWegxH9Jkjn+2O1G8ZQ +8cQKLlS7zfqLN7VXVOIVtun1/ZK/saW7Ce2usB21VPDKAWGLtAZiIb5lhT25sljU +u+OLlaSqMD4rMdYgr6rfNnszDQfZ/9J4Jt5WZU3GZj0UguXHfUg/fVprRYmM4zxy +9kGxuERErDz3mISkLKkA2Xgo+1Y4aCTTmLCZYF5F1DitC39dB6efd98Q+Gz7eI1R +fzlGYaEq4ISrIy1h8UnLQmVoNfxTSbuZJp9kmTHqefynxWQQDlfLJ4yeLB4rOMlQ +ZOE/829Pc0nFZSLoeha2f9S6CQ1nFIhGNwQuJYOhTwIDAQABo1kwVzAUBgNVHREE +DTALgglsb2NhbGhvc3QwCwYDVR0PBAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMB +MB0GA1UdDgQWBBTD07Fp8N4xue7MMVrlI77W8Hpi1zANBgkqhkiG9w0BAQsFAAOC +AQEAjl4O2sDgEoe26OaxA8ICwemoaQh/uobVSwWejtNgrId8V8g8HQFNt3M3qbPk +mi8ymwmbs9NuiDK8DpWrIiXIT/22DUFJ/mfNlRB5FBK0ACwWfLs+lDrqD+Xdz/FP +ZT07xFgByrSXkBz3Av1Zrm8PzjLAuA7Ar/XYnkdwwmlNujFx+W92gSkGUduPE51Y +qCO0pFoG6TVhMv6kIQqqefblDv32yiMB+Yki/xYiR/6kRfOim8oXbwsZZvI1/QI6 +76Pyw57+2ZO9hREZRRuhtz0Ku0BFOeFI2/K2D+RDGQZ8mEfO6z/OQ3D1lB+apYDa +1uhrG0/cbVccEQDqfjiliHJuEQ== -----END CERTIFICATE----- From 771e358f84c3200e49ac317854ecb38b26867e04 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 14 Feb 2025 12:47:33 -0800 Subject: [PATCH 07/26] make sigs match --- cloudpathlib/http/httppath.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py index 96e16033..9f2d2d8d 100644 --- a/cloudpathlib/http/httppath.py +++ b/cloudpathlib/http/httppath.py @@ -56,14 +56,14 @@ def _no_prefix_no_drive(self) -> str: # netloc appears in anchor and drive for httppath; so don't double count return self._str[len(self.anchor) - 1 :] - def is_dir(self) -> bool: + def is_dir(self, follow_symlinks: bool = True) -> bool: if not self.exists(): return False # Use client default to iden return self.client.dir_matcher(str(self)) - def is_file(self) -> bool: + def is_file(self, follow_symlinks: bool = True) -> bool: if not self.exists(): return False From a1820f763d23115bfca1a118f3e4b92fa54d904b Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 14 Feb 2025 12:51:32 -0800 Subject: [PATCH 08/26] Add parsed_url --- cloudpathlib/http/httppath.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py index 9f2d2d8d..305f8694 100644 --- a/cloudpathlib/http/httppath.py +++ b/cloudpathlib/http/httppath.py @@ -1,10 +1,8 @@ -from pathlib import PurePosixPath -from typing import Any, Tuple, Union, Optional - import os -from pathlib import Path +from pathlib import Path, PurePosixPath from tempfile import TemporaryDirectory -from typing import TYPE_CHECKING +from typing import Any, Tuple, TYPE_CHECKING, Union, Optional +import urllib from ..cloudpath import CloudPath, NoStatError, register_path_class @@ -42,6 +40,10 @@ def _dispatch_to_path(self, func: str, *args, **kwargs) -> Any: else: return sup + @property + def parsed_url(self) -> urllib.parse.ParseResult: + return self._url + @property def drive(self) -> str: # For HTTP paths, no drive; use .anchor for scheme + netloc From 5792061079d86caf199460046f8ea3b22e0374d4 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 14 Feb 2025 14:32:25 -0800 Subject: [PATCH 09/26] Add tests for verb methods --- cloudpathlib/http/httpclient.py | 10 ++++++++-- cloudpathlib/http/httppath.py | 19 ++++++++++++------ tests/conftest.py | 10 ++++++++++ tests/http_fixtures.py | 10 ++++++++++ tests/test_http.py | 34 ++++++++++++++++++++++++++++++++- 5 files changed, 74 insertions(+), 9 deletions(-) diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py index b9febac7..f36b4d95 100644 --- a/cloudpathlib/http/httpclient.py +++ b/cloudpathlib/http/httpclient.py @@ -1,4 +1,5 @@ from datetime import datetime +import http import os import re import urllib.request @@ -156,10 +157,15 @@ def _simple_links(html: str) -> Iterable[str]: for match in parser(response) ) - def request(self, url: HttpPath, method: str, **kwargs) -> None: + def request( + self, url: HttpPath, method: str, **kwargs + ) -> Tuple[http.client.HTTPResponse, bytes]: request = urllib.request.Request(url.as_url(), method=method, **kwargs) with self.opener.open(request) as response: - return response + # eager read of response content, which is not available + # after the connection is closed when we exit the + # context manager. + return response, response.read() HttpClient.HttpPath = HttpClient.CloudPath # type: ignore diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py index 305f8694..089b5911 100644 --- a/cloudpathlib/http/httppath.py +++ b/cloudpathlib/http/httppath.py @@ -1,3 +1,4 @@ +import http import os from pathlib import Path, PurePosixPath from tempfile import TemporaryDirectory @@ -8,7 +9,7 @@ if TYPE_CHECKING: - from .httpclient import HttpClient + from .httpclient import HttpClient, HttpsClient @register_path_class("http") @@ -125,22 +126,28 @@ def name(self) -> str: def parents(self) -> Tuple["HttpPath", ...]: return super().parents + (self._new_cloudpath(""),) - def get(self, **kwargs): + def get(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]: + """Issue a get request with `urllib.request.Request`""" return self.client.request(self, "GET", **kwargs) - def put(self, **kwargs): + def put(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]: + """Issue a put request with `urllib.request.Request`""" return self.client.request(self, "PUT", **kwargs) - def post(self, **kwargs): + def post(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]: + """Issue a post request with `urllib.request.Request`""" return self.client.request(self, "POST", **kwargs) - def delete(self, **kwargs): + def delete(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]: + """Issue a delete request with `urllib.request.Request`""" return self.client.request(self, "DELETE", **kwargs) - def head(self, **kwargs): + def head(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]: + """Issue a head request with `urllib.request.Request`""" return self.client.request(self, "HEAD", **kwargs) @register_path_class("https") class HttpsPath(HttpPath): cloud_prefix: str = "https://" + client: "HttpsClient" diff --git a/tests/conftest.py b/tests/conftest.py index e5c47211..1307036e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -569,6 +569,7 @@ def https_rig(request, assets_dir, https_server): # noqa: F811 local_s3_rig, local_gs_rig, http_rig, + https_rig, ], ) @@ -580,3 +581,12 @@ def https_rig(request, assets_dir, https_server): # noqa: F811 custom_s3_rig, ], ) + +# run some http-specific tests on http and https +http_like_rig = fixture_union( + "http_like_rig", + [ + http_rig, + https_rig, + ], +) diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index c3e7b3f9..495fb46e 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -50,6 +50,16 @@ def do_DELETE(self): self.end_headers() + def do_POST(self): + # roundtrip any posted JSON data for testing + content_length = int(self.headers["Content-Length"]) + post_data = self.rfile.read(content_length) + self.send_response(200) + self.send_header("Content-type", "application/json") + self.send_header("Content-Length", self.headers["Content-Length"]) + self.end_headers() + self.wfile.write(post_data) + def _http_server( root_dir, port, hostname="localhost", use_ssl=False, certfile=None, keyfile=None, threaded=True diff --git a/tests/test_http.py b/tests/test_http.py index d75c523f..f5f4b317 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -2,7 +2,7 @@ def test_https(https_rig: CloudProviderTestRig): - """Basic tests for https; we run the full suite against the http_rig""" + """Basic tests for https""" existing_file = https_rig.create_cloud_path("dir_0/file0_0.txt") # existence and listing @@ -34,3 +34,35 @@ def test_https(https_rig: CloudProviderTestRig): # metadata assert existing_file.stat().st_mtime != 0 + + +def test_http_verbs(http_like_rig: CloudProviderTestRig): + """Test that the http verbs work""" + p = http_like_rig.create_cloud_path("dir_0/file0_0.txt") + + # test put + p.put(data="Hello from 0".encode("utf-8"), headers={"Content-Type": "text/plain"}) + + # test get + resp, data = p.get() + assert resp.status == 200 + assert data.decode("utf-8") == "Hello from 0" + + # post + import json + + post_payload = {"key": "value"} + resp, data = p.post( + data=json.dumps(post_payload).encode(), headers={"Content-Type": "application/json"} + ) + assert resp.status == 200 + assert json.loads(data.decode("utf-8")) == post_payload + + # head + resp, data = p.head() + assert resp.status == 200 + assert data == b"" + + # delete + p.delete() + assert not p.exists() From 9fb57271c5447af87d4396d617a6c1a96e8a8760 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 14 Feb 2025 14:34:02 -0800 Subject: [PATCH 10/26] lint --- cloudpathlib/http/httpclient.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py index f36b4d95..058f42e2 100644 --- a/cloudpathlib/http/httpclient.py +++ b/cloudpathlib/http/httpclient.py @@ -163,7 +163,7 @@ def request( request = urllib.request.Request(url.as_url(), method=method, **kwargs) with self.opener.open(request) as response: # eager read of response content, which is not available - # after the connection is closed when we exit the + # after the connection is closed when we exit the # context manager. return response, response.read() From 9d2aaf169e492cc881e319018ec94fcb881e39fc Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 14 Feb 2025 14:39:11 -0800 Subject: [PATCH 11/26] test parsed_url --- cloudpathlib/http/__init__.py | 6 ++++-- tests/test_http.py | 8 ++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/cloudpathlib/http/__init__.py b/cloudpathlib/http/__init__.py index 5ad785be..ccf7452e 100644 --- a/cloudpathlib/http/__init__.py +++ b/cloudpathlib/http/__init__.py @@ -1,7 +1,9 @@ -from .httpclient import HttpClient -from .httppath import HttpPath +from .httpclient import HttpClient, HttpsClient +from .httppath import HttpPath, HttpsPath __all__ = [ "HttpClient", "HttpPath", + "HttpsClient", + "HttpsPath", ] diff --git a/tests/test_http.py b/tests/test_http.py index f5f4b317..e8b8ab22 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -66,3 +66,11 @@ def test_http_verbs(http_like_rig: CloudProviderTestRig): # delete p.delete() assert not p.exists() + + +def test_http_parsed_url(http_like_rig: CloudProviderTestRig): + """Test that the parsed_url property works""" + p = http_like_rig.create_cloud_path("dir_0/file0_0.txt") + assert p.parsed_url.scheme == http_like_rig.cloud_prefix.split("://")[0] + assert p.parsed_url.netloc == http_like_rig.drive + assert p.parsed_url.path == str(p).split(http_like_rig.drive)[1] From 020da523a2e1d8314a1280f64c660235239515c9 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 15 Feb 2025 14:26:03 -1000 Subject: [PATCH 12/26] test preserved properties --- cloudpathlib/http/httppath.py | 9 +++++- tests/test_http.py | 52 +++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py index 089b5911..df3ce51f 100644 --- a/cloudpathlib/http/httppath.py +++ b/cloudpathlib/http/httppath.py @@ -30,6 +30,12 @@ def __init__( else PurePosixPath(f"/{self._url.path}") ) + @property + def _local(self) -> Path: + """Cached local version of the file.""" + # remove params, query, fragment to get local path + return self.client._local_cache_dir / self._url.path.lstrip("/") + def _dispatch_to_path(self, func: str, *args, **kwargs) -> Any: sup = super()._dispatch_to_path(func, *args, **kwargs) @@ -81,7 +87,8 @@ def touch(self, exist_ok: bool = True) -> None: raise FileExistsError(f"File already exists: {self}") raise NotImplementedError( - "Touch not implemented for existing HTTP files since we can't update the modified time." + "Touch not implemented for existing HTTP files since we can't update the modified time; " + "use `put()` or write to the file instead." ) else: empty_file = Path(TemporaryDirectory().name) / "empty_file.txt" diff --git a/tests/test_http.py b/tests/test_http.py index e8b8ab22..4dbf30a2 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -1,3 +1,5 @@ +import urllib + from tests.conftest import CloudProviderTestRig @@ -74,3 +76,53 @@ def test_http_parsed_url(http_like_rig: CloudProviderTestRig): assert p.parsed_url.scheme == http_like_rig.cloud_prefix.split("://")[0] assert p.parsed_url.netloc == http_like_rig.drive assert p.parsed_url.path == str(p).split(http_like_rig.drive)[1] + + +def test_http_url_decorations(http_like_rig: CloudProviderTestRig): + def _test_preserved_properties(base_url, returned_url): + parsed_base = urllib.parse.urlparse(str(base_url)) + parsed_returned = urllib.parse.urlparse(str(returned_url)) + + assert parsed_returned.scheme == parsed_base.scheme + assert parsed_returned.netloc == parsed_base.netloc + assert parsed_returned.username == parsed_base.username + assert parsed_returned.password == parsed_base.password + assert parsed_returned.hostname == parsed_base.hostname + assert parsed_returned.port == parsed_base.port + + p = http_like_rig.create_cloud_path("dir_0/file0_0.txt") + p.write_text("Hello!") + + # add some properties to the url + new_url = p.parsed_url._replace( + params="param=value", query="query=value&query2=value2", fragment="fragment-value" + ) + p = http_like_rig.path_class(urllib.parse.urlunparse(new_url)) + + # operations that should preserve properties of the original url and need to hit the server + # glob, iterdir, walk + _test_preserved_properties(p, next(p.parent.glob("*.txt"))) + _test_preserved_properties(p, next(p.parent.iterdir())) + _test_preserved_properties(p, next(p.parent.walk())[0]) + + # rename and replace? + new_location = p.with_name("other_file.txt") + _test_preserved_properties(p, p.rename(new_location)) + _test_preserved_properties(p, new_location.replace(p)) + + # operations that should preserve properties of the original url and don't hit the server + # so that we can add some other properties (e.g., username, password) + new_url = p.parsed_url._replace(netloc="user:pass@example.com:8000") + p = http_like_rig.path_class(urllib.parse.urlunparse(new_url)) + + # parent + _test_preserved_properties(p, p.parent) + + # joining / and joinpath + _test_preserved_properties(p, p.parent / "other_file.txt") + _test_preserved_properties(p, p.parent.joinpath("other_file.txt")) + + # with_name, with_suffix, with_stem + _test_preserved_properties(p, p.with_name("other_file.txt")) + _test_preserved_properties(p, p.with_suffix(".txt")) + _test_preserved_properties(p, p.with_stem("other_file")) From 71a73a512e136f27b1d2762204e0844ad6e37bb4 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 15 Feb 2025 15:08:33 -1000 Subject: [PATCH 13/26] spread out ports; fix warnings --- tests/conftest.py | 2 +- tests/http_fixtures.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1307036e..9ae78ae7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -525,7 +525,7 @@ def https_rig(request, assets_dir, https_server): # noqa: F811 # copy test assets shutil.copytree(assets_dir, server_dir / test_dir) - skip_verify_ctx = ssl.SSLContext() + skip_verify_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) skip_verify_ctx.check_hostname = False skip_verify_ctx.load_verify_locations(utilities_dir / "insecure-test.pem") diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index 495fb46e..249382c0 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -3,6 +3,7 @@ from http.server import HTTPServer, SimpleHTTPRequestHandler import os from pathlib import Path +import random import shutil import ssl import threading @@ -94,7 +95,7 @@ def start_server(): for _ in range(10): try: if use_ssl: - req_context = ssl.SSLContext() + req_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) req_context.check_hostname = False req_context.verify_mode = ssl.CERT_NONE else: @@ -111,7 +112,7 @@ def start_server(): @fixture(scope="module") def http_server(tmp_path_factory, worker_id): - port = 9077 + ( + port = 9077 + random.randint(0, 1000) + ( int(worker_id.lstrip("gw")) if worker_id != "master" else 0 ) # don't collide if tests running in parallel with multiple servers @@ -129,7 +130,7 @@ def http_server(tmp_path_factory, worker_id): @fixture(scope="module") def https_server(tmp_path_factory, worker_id): - port = 4443 + ( + port = 4443 + random.randint(0, 1000) + ( int(worker_id.lstrip("gw")) if worker_id != "master" else 0 ) # don't collide if tests running in parallel with multiple servers From 93797a0518158dd9288fdc3cd71a911bb59b05e7 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 15 Feb 2025 15:10:20 -1000 Subject: [PATCH 14/26] lint --- tests/http_fixtures.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index 249382c0..fce2ead5 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -112,8 +112,10 @@ def start_server(): @fixture(scope="module") def http_server(tmp_path_factory, worker_id): - port = 9077 + random.randint(0, 1000) + ( - int(worker_id.lstrip("gw")) if worker_id != "master" else 0 + port = ( + 9077 + + random.randint(0, 1000) + + (int(worker_id.lstrip("gw")) if worker_id != "master" else 0) ) # don't collide if tests running in parallel with multiple servers server_dir = tmp_path_factory.mktemp("server_files").resolve() @@ -130,8 +132,10 @@ def http_server(tmp_path_factory, worker_id): @fixture(scope="module") def https_server(tmp_path_factory, worker_id): - port = 4443 + random.randint(0, 1000) + ( - int(worker_id.lstrip("gw")) if worker_id != "master" else 0 + port = ( + 4443 + + random.randint(0, 1000) + + (int(worker_id.lstrip("gw")) if worker_id != "master" else 0) ) # don't collide if tests running in parallel with multiple servers server_dir = tmp_path_factory.mktemp("server_files").resolve() From a609d0a846c69857eb8512dd7372d5a4a81ec414 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 17 Feb 2025 09:48:34 -1000 Subject: [PATCH 15/26] fix full_match --- cloudpathlib/cloudpath.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index b5cade32..f7621c5b 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -938,6 +938,9 @@ def full_match(self, pattern: str, case_sensitive: Optional[bool] = None) -> boo # strip scheme from start of pattern before testing if pattern.startswith(self.anchor + self.drive): pattern = pattern[len(self.anchor + self.drive) :] + elif pattern.startswith(self.anchor): + # for http paths, keep leading slash + pattern = pattern[len(self.anchor) - 1 :] # remove drive, which is kept on normal dispatch to pathlib return PurePosixPath(self._no_prefix_no_drive).full_match( # type: ignore[attr-defined] From 8a06929b8fd4dcfe335a886d5fa841ce2f85c8b1 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 17 Feb 2025 11:57:26 -1000 Subject: [PATCH 16/26] sleepy upload test --- tests/test_cloudpath_upload_copy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_cloudpath_upload_copy.py b/tests/test_cloudpath_upload_copy.py index 06710590..29e664f7 100644 --- a/tests/test_cloudpath_upload_copy.py +++ b/tests/test_cloudpath_upload_copy.py @@ -70,6 +70,7 @@ def test_upload_from_file(rig, upload_assets_dir): p.upload_from(upload_assets_dir / "upload_1.txt") # to file, file exists and is newer; overwrite + sleep(1.1) p.write_text("even newer") sleep(1.1) p.upload_from(upload_assets_dir / "upload_1.txt", force_overwrite_to_cloud=True) From fd0ab3f74e58c041fb58ffadbfa3fc2aba622d78 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 17 Feb 2025 12:06:25 -1000 Subject: [PATCH 17/26] docs wip --- docs/docs/http.md | 12 ++++++++++++ docs/mkdocs.yml | 1 + 2 files changed, 13 insertions(+) create mode 100644 docs/docs/http.md diff --git a/docs/docs/http.md b/docs/docs/http.md new file mode 100644 index 00000000..0a21fe71 --- /dev/null +++ b/docs/docs/http.md @@ -0,0 +1,12 @@ +url docs + - assume params, query, fragment are url-specific, so they are not preserved when joining, etc. + - netloc, scheme, port, username, password and hostname are preserved when joining, etc. + - url properties are available on `.parsed_url` + - your server needs to support DELETE and PUT for a number of operations to work + - expose http verbs as methods on the path object + - How to do auth examples + + + + + diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5d710441..4d83919c 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -18,6 +18,7 @@ nav: - Home: "index.md" - Why cloudpathlib?: "why_cloudpathlib.ipynb" - Authentication: "authentication.md" + - HTTP URLs: "http.md" - Caching: "caching.ipynb" - AnyPath: "anypath-polymorphism.md" - Other Client settings: "other_client_settings.md" From cf006300520befe7dc87f1f3af3435ad6f42acff Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 17 Feb 2025 15:02:06 -1000 Subject: [PATCH 18/26] Update docs --- cloudpathlib/http/httpclient.py | 24 ++++- docs/docs/http.md | 157 ++++++++++++++++++++++++++++++-- docs/mkdocs.yml | 6 +- 3 files changed, 177 insertions(+), 10 deletions(-) diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py index 058f42e2..ad716916 100644 --- a/cloudpathlib/http/httpclient.py +++ b/cloudpathlib/http/httpclient.py @@ -29,7 +29,24 @@ def __init__( auth: Optional[urllib.request.BaseHandler] = None, custom_list_page_parser: Optional[Callable[[str], Iterable[str]]] = None, custom_dir_matcher: Optional[Callable[[str], bool]] = None, + write_file_http_method: Optional[str] = "PUT", ): + """Class constructor. + + Args: + file_cache_mode (Optional[Union[str, FileCacheMode]]): How often to clear the file cache; see + [the caching docs](https://cloudpathlib.drivendata.org/stable/caching/) for more information + about the options in cloudpathlib.eums.FileCacheMode. + local_cache_dir (Optional[Union[str, os.PathLike]]): Path to directory to use as cache + for downloaded files. If None, will use a temporary directory. Default can be set with + the `CLOUDPATHLIB_LOCAL_CACHE_DIR` environment variable. + content_type_method (Optional[Callable]): Function to call to guess media type (mimetype) when + uploading files. Defaults to `mimetypes.guess_type`. + auth (Optional[urllib.request.BaseHandler]): Authentication handler to use for the client. Defaults to None, which will use the default handler. + custom_list_page_parser (Optional[Callable[[str], Iterable[str]]]): Function to call to parse pages that list directories. Defaults to looking for `` tags with `href`. + custom_dir_matcher (Optional[Callable[[str], bool]]): Function to call to identify a url that is a directory. Defaults to a lambda that checks if the path ends with a `/`. + write_file_http_method (Optional[str]): HTTP method to use when writing files. Defaults to "PUT", but some servers may want "POST". + """ super().__init__(file_cache_mode, local_cache_dir, content_type_method) self.auth = auth @@ -44,6 +61,8 @@ def __init__( custom_dir_matcher if custom_dir_matcher is not None else lambda x: x.endswith("/") ) + self.write_file_http_method = write_file_http_method + def _get_metadata(self, cloud_path: HttpPath) -> dict: with self.opener.open(cloud_path.as_url()) as response: last_modified = response.headers.get("Last-Modified", None) @@ -123,7 +142,10 @@ def _upload_file(self, local_path: Union[str, os.PathLike], cloud_path: HttpPath with open(local_path, "rb") as file_data: request = urllib.request.Request( - cloud_path.as_url(), data=file_data.read(), method="PUT", headers=headers + cloud_path.as_url(), + data=file_data.read(), + method=self.write_file_http_method, + headers=headers, ) with self.opener.open(request) as response: if response.status != 201 and response.status != 200: diff --git a/docs/docs/http.md b/docs/docs/http.md index 0a21fe71..79476f84 100644 --- a/docs/docs/http.md +++ b/docs/docs/http.md @@ -1,12 +1,153 @@ -url docs - - assume params, query, fragment are url-specific, so they are not preserved when joining, etc. - - netloc, scheme, port, username, password and hostname are preserved when joining, etc. - - url properties are available on `.parsed_url` - - your server needs to support DELETE and PUT for a number of operations to work - - expose http verbs as methods on the path object - - How to do auth examples +# HTTP Support in CloudPathLib +We support `http://` and `https://` URLs as `CloudPath`, but these behave somewhat differently from typical cloud provider URIs (e.g., `s3://`, `gs://`) or local file paths. This document describes those differences, caveats, and the additional configuration options available. +## How HTTP Paths Differ + +- HTTP servers are not necessarily structured like file systems. Operations such as listing directories, removing files, or creating folders depend on whether the server supports them. +- For many operations (e.g., uploading, removing files), this implementation relies on specific HTTP verbs like `PUT` or `DELETE`. If the server does not allow these verbs, those operations will fail. +- While some cloud storage backends (e.g., AWS S3) provide robust directory emulation, a basic HTTP server may only partially implement these concepts (e.g., listing a directory might just be an HTML page with links). + +## URL Modifiers (Params, Query Strings, and Fragments) + +An HTTP URL can include: +- **Params** (rarely used, often a semicolon-based suffix to the path) +- **Query strings** (e.g., `?foo=bar`) +- **Fragments** (the `#anchor` portion) + +### Preservation and Joining Behavior + +- **Params, query, and fragment** are part of the URL, but be aware that when you join paths (e.g., `my_path / "subdir"`), these modifiers will be discarded unless you explicitly preserve them, since we operate under the assumption that these modifiers are tied to the specific URL. +- **netloc (username, password, hostname, port) and scheme** are preserved when joining. They are derived from the main portion of the URL (e.g., `http://username:password@www.example.com:8080`). + +### The `HttpPath.anchor` Property + +Because of naming conventions inherited from Python’s `pathlib`, the "anchor" in a CloudPath (e.g., `my_path.anchor`) refers to `:///`. It does **not** include the fragment portion of a URL (which is sometimes also called the "anchor" in HTML contexts). In other words, `.anchor` returns something like `https://www.example.com/`, not `...#fragment`. To get the fragment, use `my_path.parsed_url.fragment`. + +## Accessing URL components + +You can access the various components of a URL via the `HttpPath.parsed_url` property, which is a [`urllib.parse.ParseResult`](https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse) object. For example: + +```python +my_path = HttpPath("http://username:password@www.example.com:8080/path/to/resource?query=param#fragment") + +print(my_path.parsed_url.scheme) # "http" +print(my_path.parsed_url.netloc) # "username:password@www.example.com:8080" + +``` + +```mermaid +flowchart LR + + %% Define colors for each block + classDef scheme fill:#FFD700,stroke:#000,stroke-width:1px,color:#000 + classDef netloc fill:#ADD8E6,stroke:#000,stroke-width:1px,color:#000 + classDef path fill:#98FB98,stroke:#000,stroke-width:1px,color:#000 + classDef query fill:#EE82EE,stroke:#000,stroke-width:1px,color:#000 + classDef fragment fill:#FFB6C1,stroke:#000,stroke-width:1px,color:#000 + + A[".scheme:
https"]:::scheme + B[".netloc:
username:password\\@www.example.com:8080"]:::netloc + C[".path:
/path/to/resource"]:::path + D[".query:
query=param"]:::query + E[".fragment:
fragment"]:::fragment + + A --> B --> C --> D --> E +``` + +## Required HTTP Verbs + +Some operations require specific HTTP verbs. If your server does not support these verbs, the operation will fail. + +Your server needs to support these operations for them to succeed: +- If your server does not allow `DELETE`, you will not be able to remove files via `HttpPath.unlink()` or `HttpPath.remove()`. +- If your server does not allow `PUT` or `POST`, you won’t be able to upload files. +- By default, we use `PUT` for creating or replacing a file. If you need `POST` for uploads, you can override the behavior by passing `write_file_http_method="POST"` to the `HttpClient` constructor. + +### Accessing HTTP Verbs on the Path Object + +`HttpPath` exposes direct methods to perform the relevant HTTP verbs: +```python +response, content = my_path.get() # issues a GET +response, content = my_path.put() # issues a PUT +response, content = my_path.post() # issues a POST +response, content = my_path.delete() # issues a DELETE +response, content = my_path.head() # issues a HEAD +``` + +These methods are thin wrappers around the client’s underlying `request(...)` method. + +## Authentication + +By default, `HttpClient` will build a simple opener with `urllib.request.build_opener()`, which typically handles no or basic system-wide HTTP auth. However, you can pass a custom `BaseHandler` (e.g., an `HTTPBasicAuthHandler` or a `CookieJar`) to the `HttpClient` constructor: + +```python +import urllib.request + +auth_handler = urllib.request.HTTPBasicAuthHandler() +auth_handler.add_password( + realm="Some Realm", + uri="http://www.example.com", + user="username", + passwd="password" +) + +client = HttpClient(auth=auth_handler) +my_path = client.CloudPath("http://www.example.com/secret/data.txt") + +# Now GET requests will include basic auth headers +content = my_path.read_text() +``` + +This can be extended to more sophisticated authentication approaches (e.g., OAuth, custom headers) by providing your own `BaseHandler` or adding more handlers to the opener. There are examples on the internet of handlers for most common authentication schemes. + +## Directory Assumptions + +A key difference from other `CloudPath` implementations: +- By default, a URL is considered a directory if it **ends with a slash**. For example, `http://example.com/somedir/`. +- If you call `HttpPath.is_dir()`, it checks `my_url.endswith("/")` by default. You can override this with a custom function by passing `custom_dir_matcher` to `HttpClient`. + +### Listing the Contents of a Directory + +We attempt to parse directory listings by calling `GET` on the URL (which presumably returns an HTML directory index or a custom listing). Our default parser looks for `
` tags and yields them as child paths. You can override this logic with `custom_list_page_parser` if your server’s HTML or API returns a different listing format. For example: + +```python +def my_parser(html_content: str) -> Iterable[str]: + # for example, just get a with href and class "file-link" + # using beautifulsoup + soup = BeautifulSoup(html_content, "html.parser") + for link in soup.find_all("a", class_="file-link"): + yield link.get("href") + +client = HttpClient(custom_list_page_parser=my_parser) +my_dir = client.CloudPath("http://example.com/public/") + +for subpath, is_dir in my_dir.list_dir(recursive=False): + print(subpath, "dir" if is_dir else "file") +``` + +> **Note**: If your server doesn’t provide an HTML index or a suitable listing format, you will likely get an error. In that case, you must provide a custom parser or avoid directory-listing operations altogether. + +## HTTP or HTTPS + +We register separate classes internally (`HttpClient`/`HttpPath` for `http://`, `HttpsClient`/`HttpsPath` for `https://`). However, from a usage standpoint, you typically just do: + +```python +from cloudpathlib import AnyPath + +# AnyPath will automatically detect "http://" or "https://" +my_path = AnyPath("https://www.example.com/files/info.txt") +``` + +If you explicitly instantiate a `HttpClient`, it will handle `http://`. If you instantiate a `HttpsClient`, it will handle `https://`. But `AnyPath` can route to the correct client class automatically. + +## Additional Notes + +- **Caching**: This implementation uses the same local file caching mechanics as other CloudPathLib providers, controlled by `file_cache_mode` and `local_cache_dir`. However, for static HTTP servers, re-downloading or re-checking may not be as efficient as with typical cloud storages that return robust metadata. +- **“Move” or “Rename”**: The `_move_file` operation is implemented as an upload followed by a delete. This will fail if your server does not allow both `PUT` and `DELETE`. + + + +We support HTTP URLs to links on the internet with a few caveats about how these may behave differently - diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 4d83919c..29743fb4 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -47,7 +47,11 @@ nav: markdown_extensions: - admonition - pymdownx.highlight - - pymdownx.superfences + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format - toc: permalink: True toc_depth: 3 From 2cee52b70e96e958e48e4222233c100bf5d98de4 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 17 Feb 2025 15:03:03 -1000 Subject: [PATCH 19/26] lint --- cloudpathlib/http/httpclient.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py index ad716916..1d1f2f64 100644 --- a/cloudpathlib/http/httpclient.py +++ b/cloudpathlib/http/httpclient.py @@ -31,7 +31,7 @@ def __init__( custom_dir_matcher: Optional[Callable[[str], bool]] = None, write_file_http_method: Optional[str] = "PUT", ): - """Class constructor. + """Class constructor. Args: file_cache_mode (Optional[Union[str, FileCacheMode]]): How often to clear the file cache; see From 81903c8f21b22785a46a754d109b6a64e4859657 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 19 Feb 2025 14:35:21 -1000 Subject: [PATCH 20/26] improve http docs --- docs/docs/http.md | 157 +++++++++++++++++++++++++++++++--------------- 1 file changed, 105 insertions(+), 52 deletions(-) diff --git a/docs/docs/http.md b/docs/docs/http.md index 79476f84..dee77613 100644 --- a/docs/docs/http.md +++ b/docs/docs/http.md @@ -1,41 +1,60 @@ # HTTP Support in CloudPathLib -We support `http://` and `https://` URLs as `CloudPath`, but these behave somewhat differently from typical cloud provider URIs (e.g., `s3://`, `gs://`) or local file paths. This document describes those differences, caveats, and the additional configuration options available. +We support `http://` and `https://` URLs with `CloudPath`, but these behave somewhat differently from typical cloud provider URIs (e.g., `s3://`, `gs://`) or local file paths. This document describes those differences, caveats, and the additional configuration options available. -## How HTTP Paths Differ + > **Note:** We don't currently automatically detect `http` links to cloud storage providers (for example, `http://s3.amazonaws.com/bucket/key`) and treat those as `S3Path`, `GSPath`, etc. They will be treated as `HttpPath` objects. -- HTTP servers are not necessarily structured like file systems. Operations such as listing directories, removing files, or creating folders depend on whether the server supports them. -- For many operations (e.g., uploading, removing files), this implementation relies on specific HTTP verbs like `PUT` or `DELETE`. If the server does not allow these verbs, those operations will fail. -- While some cloud storage backends (e.g., AWS S3) provide robust directory emulation, a basic HTTP server may only partially implement these concepts (e.g., listing a directory might just be an HTML page with links). +## Basic Usage -## URL Modifiers (Params, Query Strings, and Fragments) +```python +from cloudpathlib import CloudPath -An HTTP URL can include: -- **Params** (rarely used, often a semicolon-based suffix to the path) -- **Query strings** (e.g., `?foo=bar`) -- **Fragments** (the `#anchor` portion) +# Create a path object +path = CloudPath("https://example.com/data/file.txt") -### Preservation and Joining Behavior +# Read file contents +text = path.read_text() +binary = path.read_bytes() -- **Params, query, and fragment** are part of the URL, but be aware that when you join paths (e.g., `my_path / "subdir"`), these modifiers will be discarded unless you explicitly preserve them, since we operate under the assumption that these modifiers are tied to the specific URL. -- **netloc (username, password, hostname, port) and scheme** are preserved when joining. They are derived from the main portion of the URL (e.g., `http://username:password@www.example.com:8080`). +# Get parent directory +parent = path.parent # https://example.com/data/ -### The `HttpPath.anchor` Property +# Join paths +subpath = path.parent / "other.txt" # https://example.com/data/other.txt -Because of naming conventions inherited from Python’s `pathlib`, the "anchor" in a CloudPath (e.g., `my_path.anchor`) refers to `:///`. It does **not** include the fragment portion of a URL (which is sometimes also called the "anchor" in HTML contexts). In other words, `.anchor` returns something like `https://www.example.com/`, not `...#fragment`. To get the fragment, use `my_path.parsed_url.fragment`. +# Check if file exists +if path.exists(): + print("File exists!") -## Accessing URL components +# Get file name and suffix +print(path.name) # "file.txt" +print(path.suffix) # ".txt" -You can access the various components of a URL via the `HttpPath.parsed_url` property, which is a [`urllib.parse.ParseResult`](https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse) object. For example: +# List directory contents (if server supports directory listings) +data_dir = CloudPath("https://example.com/data/") +for child_path in data_dir.iterdir(): + print(child_path) +``` -```python -my_path = HttpPath("http://username:password@www.example.com:8080/path/to/resource?query=param#fragment") +## How HTTP Paths Differ -print(my_path.parsed_url.scheme) # "http" -print(my_path.parsed_url.netloc) # "username:password@www.example.com:8080" + - HTTP servers are not necessarily structured like file systems. Operations such as listing directories, removing files, or creating folders depend on whether the server supports them. + - For many operations (e.g., uploading, removing files), this implementation relies on specific HTTP verbs like `PUT` or `DELETE`. If the server does not allow these verbs, those operations will fail. + - While some cloud storage backends (e.g., AWS S3) provide robust directory emulation, a basic HTTP server may only partially implement these concepts (e.g., listing a directory might just be an HTML page with links). + - HTTP URLs often include more than just a path, for example query strings, fragments, and other URL modifiers that are not part of the path. These are handled differently than with other cloud storage providers. + +## URL components +You can access the various components of a URL via the `HttpPath.parsed_url` property, which is a [`urllib.parse.ParseResult`](https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse) object. + +For example for the following URL: + +``` +https://username:password@www.example.com:8080/path/to/resource?query=param#fragment ``` +The components are: + ```mermaid flowchart LR @@ -46,27 +65,56 @@ flowchart LR classDef query fill:#EE82EE,stroke:#000,stroke-width:1px,color:#000 classDef fragment fill:#FFB6C1,stroke:#000,stroke-width:1px,color:#000 - A[".scheme:
https"]:::scheme - B[".netloc:
username:password\\@www.example.com:8080"]:::netloc - C[".path:
/path/to/resource"]:::path - D[".query:
query=param"]:::query - E[".fragment:
fragment"]:::fragment + A[".scheme
https"]:::scheme + B[".netloc
username:password\@www‎.example.com:8080"]:::netloc + C[".path
/path/to/resource"]:::path + D[".query
query=param"]:::query + E[".fragment
fragment"]:::fragment A --> B --> C --> D --> E ``` -## Required HTTP Verbs +To access the components of the URL, you can use the `HttpPath.parsed_url` property: + +```python +my_path = HttpPath("http://username:password@www.example.com:8080/path/to/resource?query=param#fragment") -Some operations require specific HTTP verbs. If your server does not support these verbs, the operation will fail. +print(my_path.parsed_url.scheme) # "http" +print(my_path.parsed_url.netloc) # "username:password@www.example.com:8080" +print(my_path.parsed_url.path) # "/path/to/resource" +print(my_path.parsed_url.query) # "query=param" +print(my_path.parsed_url.fragment) # "fragment" + +# extra properties that are subcomponents of `netloc` +print(my_path.parsed_url.username) # "username" +print(my_path.parsed_url.password) # "password" +print(my_path.parsed_url.hostname) # "www.example.com" +print(my_path.parsed_url.port) # "8080" +``` + +### Preservation and Joining Behavior + + - **Params, query, and fragment** are part of the URL, but be aware that when you perform operations that return a new path (e.g., joining `my_path / "subdir"`, walking directories, fetching parents, etc.), these modifiers will be discarded unless you explicitly preserve them, since we operate under the assumption that these modifiers are tied to the specific URL. + - **netloc (including the subcomponents, username, password, hostname, port) and scheme** are preserved when joining. They are derived from the main portion of the URL (e.g., `http://username:password@www.example.com:8080`). + +### The `HttpPath.anchor` Property + +Because of naming conventions inherited from Python's `pathlib`, the "anchor" in a CloudPath (e.g., `my_path.anchor`) refers to `:///`. It does **not** include the "fragment" portion of a URL (which is sometimes also called the "anchor" in HTML contexts since it can refer to a `
` tag). In other words, `.anchor` returns something like `https://www.example.com/`, not `...#fragment`. To get the fragment, use `my_path.parsed_url.fragment`. + +## Required serverside HTTP verbs support + +Some operations require that the server support specific HTTP verbs. If your server does not support these verbs, the operation will fail. Your server needs to support these operations for them to succeed: -- If your server does not allow `DELETE`, you will not be able to remove files via `HttpPath.unlink()` or `HttpPath.remove()`. -- If your server does not allow `PUT` or `POST`, you won’t be able to upload files. -- By default, we use `PUT` for creating or replacing a file. If you need `POST` for uploads, you can override the behavior by passing `write_file_http_method="POST"` to the `HttpClient` constructor. -### Accessing HTTP Verbs on the Path Object + - If your server does not allow `DELETE`, you will not be able to remove files via `HttpPath.unlink()` or `HttpPath.remove()`. + - If your server does not allow `PUT` (or `POST`, see next bullet), you won't be able to upload files. + - By default, we use `PUT` for creating or replacing a file. If you need `POST` for uploads, you can override the behavior by passing `write_file_http_method="POST"` to the `HttpClient` constructor. + +### Making requests with the `HttpPath` object + +`HttpPath` and `HttpsPath` expose direct methods to perform the relevant HTTP verbs: -`HttpPath` exposes direct methods to perform the relevant HTTP verbs: ```python response, content = my_path.get() # issues a GET response, content = my_path.put() # issues a PUT @@ -75,11 +123,11 @@ response, content = my_path.delete() # issues a DELETE response, content = my_path.head() # issues a HEAD ``` -These methods are thin wrappers around the client’s underlying `request(...)` method. +These methods are thin wrappers around the client's underlying `request(...)` method, so you can pass any arguments that [`urllib.request.Request`](https://docs.python.org/3/library/urllib.request.html#urllib.request.Request) supports, so you can pass content via `data=` and headers via `headers=`. ## Authentication -By default, `HttpClient` will build a simple opener with `urllib.request.build_opener()`, which typically handles no or basic system-wide HTTP auth. However, you can pass a custom `BaseHandler` (e.g., an `HTTPBasicAuthHandler` or a `CookieJar`) to the `HttpClient` constructor: +By default, `HttpClient` will build a simple opener with `urllib.request.build_opener()`, which typically handles no or basic system-wide HTTP auth. However, you can pass an implementation of `urllib.request.BaseHandler` (e.g., an `HTTPBasicAuthHandler`) to the `HttpClient` of `HttpsClient` constructors to handle authentication: ```python import urllib.request @@ -99,17 +147,18 @@ my_path = client.CloudPath("http://www.example.com/secret/data.txt") content = my_path.read_text() ``` -This can be extended to more sophisticated authentication approaches (e.g., OAuth, custom headers) by providing your own `BaseHandler` or adding more handlers to the opener. There are examples on the internet of handlers for most common authentication schemes. +This can be extended to more sophisticated authentication approaches (e.g., OAuth, custom headers) by providing your own `BaseHandler` implementation. There are examples on the internet of handlers for most common authentication schemes. ## Directory Assumptions -A key difference from other `CloudPath` implementations: -- By default, a URL is considered a directory if it **ends with a slash**. For example, `http://example.com/somedir/`. -- If you call `HttpPath.is_dir()`, it checks `my_url.endswith("/")` by default. You can override this with a custom function by passing `custom_dir_matcher` to `HttpClient`. +Directories are handled differently from other `CloudPath` implementations: + + - By default, a URL is considered a directory if it **ends with a slash**. For example, `http://example.com/somedir/`. + - If you call `HttpPath.is_dir()`, it checks `my_url.endswith("/")` by default. You can override this with a custom function by passing `custom_dir_matcher` to `HttpClient`. This will allow you to implement custom logic for determining if a URL is a directory. The `custom_dir_matcher` will receive a string representing the URL, so if you need to interact with the server, you will need to make those requests within your `custom_dir_matcher` implementation. ### Listing the Contents of a Directory -We attempt to parse directory listings by calling `GET` on the URL (which presumably returns an HTML directory index or a custom listing). Our default parser looks for `` tags and yields them as child paths. You can override this logic with `custom_list_page_parser` if your server’s HTML or API returns a different listing format. For example: +We attempt to parse directory listings by calling `GET` on the directory URL (which presumably returns an HTML page that has a directory index). Our default parser looks for `` tags and yields them, assuming they are children. You can override this logic with `custom_list_page_parser` if your server's HTML or API returns a different listing format. For example: ```python def my_parser(html_content: str) -> Iterable[str]: @@ -126,28 +175,32 @@ for subpath, is_dir in my_dir.list_dir(recursive=False): print(subpath, "dir" if is_dir else "file") ``` -> **Note**: If your server doesn’t provide an HTML index or a suitable listing format, you will likely get an error. In that case, you must provide a custom parser or avoid directory-listing operations altogether. +**Note**: If your server doesn't provide an HTML index or a suitable listing format that we can parse, you will see: + +``` +NotImplementedError("Unable to parse response as a listing of files; please provide a custom parser as `custom_list_page_parser`.") +``` + +In that case, you must provide a custom parser or avoid directory-listing operations altogether. ## HTTP or HTTPS -We register separate classes internally (`HttpClient`/`HttpPath` for `http://`, `HttpsClient`/`HttpsPath` for `https://`). However, from a usage standpoint, you typically just do: +There are separate classes for `HttpClient`/`HttpPath` for `http://` and `HttpsClient`/`HttpsPath` for `https://`. However, from a usage standpoint, you can use either `CloudPath` or `AnyPath` to dispatch to the right subclass. ```python -from cloudpathlib import AnyPath +from cloudpathlib import AnyPath, CloudPath -# AnyPath will automatically detect "http://" or "https://" +# AnyPath will automatically detect "http://" or "https://" (or local file paths) my_path = AnyPath("https://www.example.com/files/info.txt") + +# CloudPath will dispatch to the correct subclass +my_path = CloudPath("https://www.example.com/files/info.txt") ``` -If you explicitly instantiate a `HttpClient`, it will handle `http://`. If you instantiate a `HttpsClient`, it will handle `https://`. But `AnyPath` can route to the correct client class automatically. +If you explicitly instantiate a `HttpClient`, it will only handle `http://` paths. If you instantiate a `HttpsClient`, it will only handle `https://` paths. But `AnyPath` and `CloudPath` will route to the correct client class automatically. ## Additional Notes -- **Caching**: This implementation uses the same local file caching mechanics as other CloudPathLib providers, controlled by `file_cache_mode` and `local_cache_dir`. However, for static HTTP servers, re-downloading or re-checking may not be as efficient as with typical cloud storages that return robust metadata. -- **“Move” or “Rename”**: The `_move_file` operation is implemented as an upload followed by a delete. This will fail if your server does not allow both `PUT` and `DELETE`. - - - -We support HTTP URLs to links on the internet with a few caveats about how these may behave differently - + - **Caching**: This implementation uses the same local file caching mechanics as other CloudPathLib providers, controlled by `file_cache_mode` and `local_cache_dir`. However, for static HTTP servers, re-downloading or re-checking may not be as efficient as with typical cloud storages that return robust metadata. + - **"Move" or "Rename"**: The `_move_file` operation is implemented as an upload followed by a delete. This will fail if your server does not allow both `PUT` and `DELETE`. From acc93a0c9e902337e7017f395664698432705404 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 19 Feb 2025 14:40:27 -1000 Subject: [PATCH 21/26] add table --- README.md | 173 +++++++++++++++++++------------------ docs/make_support_table.py | 1 + 2 files changed, 92 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index 2013f74e..5c50e294 100644 --- a/README.md +++ b/README.md @@ -124,88 +124,97 @@ list(root_dir.glob('**/*.txt')) Most methods and properties from `pathlib.Path` are supported except for the ones that don't make sense in a cloud context. There are a few additional methods or properties that relate to specific cloud services or specifically for cloud paths. -| Methods + properties | `AzureBlobPath` | `S3Path` | `GSPath` | -|:-----------------------|:------------------|:-----------|:-----------| -| `absolute` | ✅ | ✅ | ✅ | -| `anchor` | ✅ | ✅ | ✅ | -| `as_uri` | ✅ | ✅ | ✅ | -| `drive` | ✅ | ✅ | ✅ | -| `exists` | ✅ | ✅ | ✅ | -| `glob` | ✅ | ✅ | ✅ | -| `is_absolute` | ✅ | ✅ | ✅ | -| `is_dir` | ✅ | ✅ | ✅ | -| `is_file` | ✅ | ✅ | ✅ | -| `is_relative_to` | ✅ | ✅ | ✅ | -| `iterdir` | ✅ | ✅ | ✅ | -| `joinpath` | ✅ | ✅ | ✅ | -| `match` | ✅ | ✅ | ✅ | -| `mkdir` | ✅ | ✅ | ✅ | -| `name` | ✅ | ✅ | ✅ | -| `open` | ✅ | ✅ | ✅ | -| `parent` | ✅ | ✅ | ✅ | -| `parents` | ✅ | ✅ | ✅ | -| `parts` | ✅ | ✅ | ✅ | -| `read_bytes` | ✅ | ✅ | ✅ | -| `read_text` | ✅ | ✅ | ✅ | -| `relative_to` | ✅ | ✅ | ✅ | -| `rename` | ✅ | ✅ | ✅ | -| `replace` | ✅ | ✅ | ✅ | -| `resolve` | ✅ | ✅ | ✅ | -| `rglob` | ✅ | ✅ | ✅ | -| `rmdir` | ✅ | ✅ | ✅ | -| `samefile` | ✅ | ✅ | ✅ | -| `stat` | ✅ | ✅ | ✅ | -| `stem` | ✅ | ✅ | ✅ | -| `suffix` | ✅ | ✅ | ✅ | -| `suffixes` | ✅ | ✅ | ✅ | -| `touch` | ✅ | ✅ | ✅ | -| `unlink` | ✅ | ✅ | ✅ | -| `with_name` | ✅ | ✅ | ✅ | -| `with_stem` | ✅ | ✅ | ✅ | -| `with_suffix` | ✅ | ✅ | ✅ | -| `write_bytes` | ✅ | ✅ | ✅ | -| `write_text` | ✅ | ✅ | ✅ | -| `as_posix` | ❌ | ❌ | ❌ | -| `chmod` | ❌ | ❌ | ❌ | -| `cwd` | ❌ | ❌ | ❌ | -| `expanduser` | ❌ | ❌ | ❌ | -| `group` | ❌ | ❌ | ❌ | -| `hardlink_to` | ❌ | ❌ | ❌ | -| `home` | ❌ | ❌ | ❌ | -| `is_block_device` | ❌ | ❌ | ❌ | -| `is_char_device` | ❌ | ❌ | ❌ | -| `is_fifo` | ❌ | ❌ | ❌ | -| `is_mount` | ❌ | ❌ | ❌ | -| `is_reserved` | ❌ | ❌ | ❌ | -| `is_socket` | ❌ | ❌ | ❌ | -| `is_symlink` | ❌ | ❌ | ❌ | -| `lchmod` | ❌ | ❌ | ❌ | -| `link_to` | ❌ | ❌ | ❌ | -| `lstat` | ❌ | ❌ | ❌ | -| `owner` | ❌ | ❌ | ❌ | -| `readlink` | ❌ | ❌ | ❌ | -| `root` | ❌ | ❌ | ❌ | -| `symlink_to` | ❌ | ❌ | ❌ | -| `as_url` | ✅ | ✅ | ✅ | -| `clear_cache` | ✅ | ✅ | ✅ | -| `cloud_prefix` | ✅ | ✅ | ✅ | -| `copy` | ✅ | ✅ | ✅ | -| `copytree` | ✅ | ✅ | ✅ | -| `download_to` | ✅ | ✅ | ✅ | -| `etag` | ✅ | ✅ | ✅ | -| `fspath` | ✅ | ✅ | ✅ | -| `is_junction` | ✅ | ✅ | ✅ | -| `is_valid_cloudpath` | ✅ | ✅ | ✅ | -| `rmtree` | ✅ | ✅ | ✅ | -| `upload_from` | ✅ | ✅ | ✅ | -| `validate` | ✅ | ✅ | ✅ | -| `walk` | ✅ | ✅ | ✅ | -| `with_segments` | ✅ | ✅ | ✅ | -| `blob` | ✅ | ❌ | ✅ | -| `bucket` | ❌ | ✅ | ✅ | -| `container` | ✅ | ❌ | ❌ | -| `key` | ❌ | ✅ | ❌ | -| `md5` | ✅ | ❌ | ✅ | +| Methods + properties | `AzureBlobPath` | `GSPath` | `HttpsPath` | `S3Path` | +|:-----------------------|:------------------|:-----------|:--------------|:-----------| +| `absolute` | ✅ | ✅ | ✅ | ✅ | +| `anchor` | ✅ | ✅ | ✅ | ✅ | +| `as_uri` | ✅ | ✅ | ✅ | ✅ | +| `drive` | ✅ | ✅ | ✅ | ✅ | +| `exists` | ✅ | ✅ | ✅ | ✅ | +| `glob` | ✅ | ✅ | ✅ | ✅ | +| `is_absolute` | ✅ | ✅ | ✅ | ✅ | +| `is_dir` | ✅ | ✅ | ✅ | ✅ | +| `is_file` | ✅ | ✅ | ✅ | ✅ | +| `is_junction` | ✅ | ✅ | ✅ | ✅ | +| `is_relative_to` | ✅ | ✅ | ✅ | ✅ | +| `iterdir` | ✅ | ✅ | ✅ | ✅ | +| `joinpath` | ✅ | ✅ | ✅ | ✅ | +| `match` | ✅ | ✅ | ✅ | ✅ | +| `mkdir` | ✅ | ✅ | ✅ | ✅ | +| `name` | ✅ | ✅ | ✅ | ✅ | +| `open` | ✅ | ✅ | ✅ | ✅ | +| `parent` | ✅ | ✅ | ✅ | ✅ | +| `parents` | ✅ | ✅ | ✅ | ✅ | +| `parts` | ✅ | ✅ | ✅ | ✅ | +| `read_bytes` | ✅ | ✅ | ✅ | ✅ | +| `read_text` | ✅ | ✅ | ✅ | ✅ | +| `relative_to` | ✅ | ✅ | ✅ | ✅ | +| `rename` | ✅ | ✅ | ✅ | ✅ | +| `replace` | ✅ | ✅ | ✅ | ✅ | +| `resolve` | ✅ | ✅ | ✅ | ✅ | +| `rglob` | ✅ | ✅ | ✅ | ✅ | +| `rmdir` | ✅ | ✅ | ✅ | ✅ | +| `samefile` | ✅ | ✅ | ✅ | ✅ | +| `stat` | ✅ | ✅ | ✅ | ✅ | +| `stem` | ✅ | ✅ | ✅ | ✅ | +| `suffix` | ✅ | ✅ | ✅ | ✅ | +| `suffixes` | ✅ | ✅ | ✅ | ✅ | +| `touch` | ✅ | ✅ | ✅ | ✅ | +| `unlink` | ✅ | ✅ | ✅ | ✅ | +| `walk` | ✅ | ✅ | ✅ | ✅ | +| `with_name` | ✅ | ✅ | ✅ | ✅ | +| `with_segments` | ✅ | ✅ | ✅ | ✅ | +| `with_stem` | ✅ | ✅ | ✅ | ✅ | +| `with_suffix` | ✅ | ✅ | ✅ | ✅ | +| `write_bytes` | ✅ | ✅ | ✅ | ✅ | +| `write_text` | ✅ | ✅ | ✅ | ✅ | +| `as_posix` | ❌ | ❌ | ❌ | ❌ | +| `chmod` | ❌ | ❌ | ❌ | ❌ | +| `cwd` | ❌ | ❌ | ❌ | ❌ | +| `expanduser` | ❌ | ❌ | ❌ | ❌ | +| `group` | ❌ | ❌ | ❌ | ❌ | +| `hardlink_to` | ❌ | ❌ | ❌ | ❌ | +| `home` | ❌ | ❌ | ❌ | ❌ | +| `is_block_device` | ❌ | ❌ | ❌ | ❌ | +| `is_char_device` | ❌ | ❌ | ❌ | ❌ | +| `is_fifo` | ❌ | ❌ | ❌ | ❌ | +| `is_mount` | ❌ | ❌ | ❌ | ❌ | +| `is_reserved` | ❌ | ❌ | ❌ | ❌ | +| `is_socket` | ❌ | ❌ | ❌ | ❌ | +| `is_symlink` | ❌ | ❌ | ❌ | ❌ | +| `lchmod` | ❌ | ❌ | ❌ | ❌ | +| `lstat` | ❌ | ❌ | ❌ | ❌ | +| `owner` | ❌ | ❌ | ❌ | ❌ | +| `readlink` | ❌ | ❌ | ❌ | ❌ | +| `root` | ❌ | ❌ | ❌ | ❌ | +| `symlink_to` | ❌ | ❌ | ❌ | ❌ | +| `as_url` | ✅ | ✅ | ✅ | ✅ | +| `clear_cache` | ✅ | ✅ | ✅ | ✅ | +| `client` | ✅ | ✅ | ✅ | ✅ | +| `cloud_prefix` | ✅ | ✅ | ✅ | ✅ | +| `copy` | ✅ | ✅ | ✅ | ✅ | +| `copytree` | ✅ | ✅ | ✅ | ✅ | +| `download_to` | ✅ | ✅ | ✅ | ✅ | +| `from_uri` | ✅ | ✅ | ✅ | ✅ | +| `fspath` | ✅ | ✅ | ✅ | ✅ | +| `full_match` | ✅ | ✅ | ✅ | ✅ | +| `is_valid_cloudpath` | ✅ | ✅ | ✅ | ✅ | +| `parser` | ✅ | ✅ | ✅ | ✅ | +| `rmtree` | ✅ | ✅ | ✅ | ✅ | +| `upload_from` | ✅ | ✅ | ✅ | ✅ | +| `validate` | ✅ | ✅ | ✅ | ✅ | +| `etag` | ✅ | ✅ | ❌ | ✅ | +| `blob` | ✅ | ✅ | ❌ | ❌ | +| `bucket` | ❌ | ✅ | ❌ | ✅ | +| `md5` | ✅ | ✅ | ❌ | ❌ | +| `container` | ✅ | ❌ | ❌ | ❌ | +| `delete` | ❌ | ❌ | ✅ | ❌ | +| `get` | ❌ | ❌ | ✅ | ❌ | +| `head` | ❌ | ❌ | ✅ | ❌ | +| `key` | ❌ | ❌ | ❌ | ✅ | +| `parsed_url` | ❌ | ❌ | ✅ | ❌ | +| `post` | ❌ | ❌ | ✅ | ❌ | +| `put` | ❌ | ❌ | ✅ | ❌ | ---- diff --git a/docs/make_support_table.py b/docs/make_support_table.py index ad06142a..47f7961d 100644 --- a/docs/make_support_table.py +++ b/docs/make_support_table.py @@ -12,6 +12,7 @@ def print_table(): lib_methods = { v.path_class.__name__: {m for m in dir(v.path_class) if not m.startswith("_")} for k, v in cloudpathlib.cloudpath.implementation_registry.items() + if not k in ["http"] # just list https in table since they are the same } all_methods = copy(path_base) From dff738c1a59330f73f6a7aac95744afc07dbcd0a Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 19 Feb 2025 14:41:16 -1000 Subject: [PATCH 22/26] lint --- docs/make_support_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/make_support_table.py b/docs/make_support_table.py index 47f7961d..eb3a34f2 100644 --- a/docs/make_support_table.py +++ b/docs/make_support_table.py @@ -12,7 +12,7 @@ def print_table(): lib_methods = { v.path_class.__name__: {m for m in dir(v.path_class) if not m.startswith("_")} for k, v in cloudpathlib.cloudpath.implementation_registry.items() - if not k in ["http"] # just list https in table since they are the same + if k not in ["http"] # just list https in table since they are the same } all_methods = copy(path_base) From 204a04930fbd6784071f7baaeb2bb5ad76089e2c Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Thu, 20 Feb 2025 13:07:47 -1000 Subject: [PATCH 23/26] try skipping http rigs on windows in CI --- tests/conftest.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9ae78ae7..733727ce 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ import os from pathlib import Path, PurePosixPath +import platform import shutil import ssl from tempfile import TemporaryDirectory @@ -557,17 +558,30 @@ def https_rig(request, assets_dir, https_server): # noqa: F811 ], ) +main_rigs = [ + azure_rig, # azure_rig0 + azure_gen2_rig, # azure_rig1 + gs_rig, + s3_rig, + custom_s3_rig, + local_azure_rig, + local_s3_rig, + local_gs_rig, +] + +# add http rigs if not on Windows in CI, which is slow +if not os.getenv("CI") or platform.system() != "Windows": + main_rigs.extend( + [ + http_rig, + https_rig, + ] + ) + + rig = fixture_union( "rig", [ - azure_rig, # azure_rig0 - azure_gen2_rig, # azure_rig1 - gs_rig, - s3_rig, - custom_s3_rig, - local_azure_rig, - local_s3_rig, - local_gs_rig, http_rig, https_rig, ], From 262c574d1d9bc742aa946576bd792b607971f0fb Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 26 Feb 2025 17:52:31 -0800 Subject: [PATCH 24/26] more stable tests --- cloudpathlib/__init__.py | 6 +++-- tests/conftest.py | 8 +++++-- tests/http_fixtures.py | 51 ++++++++++++++++++++++++++++++++-------- tests/utils.py | 15 ++++++++++++ 4 files changed, 66 insertions(+), 14 deletions(-) create mode 100644 tests/utils.py diff --git a/cloudpathlib/__init__.py b/cloudpathlib/__init__.py index c51fc45e..84ed31b2 100644 --- a/cloudpathlib/__init__.py +++ b/cloudpathlib/__init__.py @@ -6,8 +6,8 @@ from .cloudpath import CloudPath, implementation_registry from .gs.gsclient import GSClient from .gs.gspath import GSPath -from .http.httpclient import HttpClient -from .http.httppath import HttpPath +from .http.httpclient import HttpClient, HttpsClient +from .http.httppath import HttpPath, HttpsPath from .s3.s3client import S3Client from .s3.s3path import S3Path @@ -30,7 +30,9 @@ "GSClient", "GSPath", "HttpClient", + "HttpsClient", "HttpPath", + "HttpsPath", "S3Client", "S3Path", ] diff --git a/tests/conftest.py b/tests/conftest.py index 733727ce..4d0c66f6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,6 +38,7 @@ import cloudpathlib.azure.azblobclient from cloudpathlib.azure.azblobclient import _hns_rmtree import cloudpathlib.s3.s3client +from .http_fixtures import http_server, https_server, utilities_dir # noqa: F401 from .mock_clients.mock_azureblob import MockBlobServiceClient, DEFAULT_CONTAINER_NAME from .mock_clients.mock_adls_gen2 import MockedDataLakeServiceClient from .mock_clients.mock_gs import ( @@ -46,10 +47,9 @@ MockTransferManager, ) from .mock_clients.mock_s3 import mocked_session_class_factory, DEFAULT_S3_BUCKET_NAME +from .utils import _sync_filesystem -from .http_fixtures import http_server, https_server, utilities_dir # noqa: F401 - if os.getenv("USE_LIVE_CLOUD") == "1": load_dotenv(find_dotenv()) @@ -499,6 +499,7 @@ def http_rig(request, assets_dir, http_server): # noqa: F811 # copy test assets shutil.copytree(assets_dir, server_dir / test_dir) + _sync_filesystem() rig = CloudProviderTestRig( path_class=HttpPath, @@ -514,6 +515,7 @@ def http_rig(request, assets_dir, http_server): # noqa: F811 rig.client_class._default_client = None # reset default client shutil.rmtree(server_dir) + _sync_filesystem() @fixture() @@ -525,6 +527,7 @@ def https_rig(request, assets_dir, https_server): # noqa: F811 # copy test assets shutil.copytree(assets_dir, server_dir / test_dir) + _sync_filesystem() skip_verify_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) skip_verify_ctx.check_hostname = False @@ -547,6 +550,7 @@ def https_rig(request, assets_dir, https_server): # noqa: F811 rig.client_class._default_client = None # reset default client shutil.rmtree(server_dir) + _sync_filesystem() # create azure fixtures for both blob and gen2 storage diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index fce2ead5..cb3339d3 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -11,6 +11,7 @@ from urllib.request import urlopen from pytest import fixture +from tenacity import retry, stop_after_attempt, wait_fixed utilities_dir = Path(__file__).parent / "utilities" @@ -19,6 +20,7 @@ class TestHTTPRequestHandler(SimpleHTTPRequestHandler): """Also allows PUT and DELETE requests for testing.""" + @retry(stop=stop_after_attempt(5), wait=wait_fixed(0.1)) def do_PUT(self): length = int(self.headers["Content-Length"]) path = Path(self.translate_path(self.path)) @@ -31,12 +33,18 @@ def do_PUT(self): with path.open("wb") as f: f.write(self.rfile.read(length)) + # Ensure the file is flushed and synced to disk before returning + # The perf hit is ok here since this is a test server + f.flush() + os.fsync(f.fileno()) + now = datetime.now().timestamp() os.utime(path, (now, now)) self.send_response(201) self.end_headers() + @retry(stop=stop_after_attempt(5), wait=wait_fixed(0.1)) def do_DELETE(self): path = Path(self.translate_path(self.path)) @@ -51,6 +59,7 @@ def do_DELETE(self): self.end_headers() + @retry(stop=stop_after_attempt(5), wait=wait_fixed(0.1)) def do_POST(self): # roundtrip any posted JSON data for testing content_length = int(self.headers["Content-Length"]) @@ -61,6 +70,14 @@ def do_POST(self): self.end_headers() self.wfile.write(post_data) + @retry(stop=stop_after_attempt(5), wait=wait_fixed(0.1)) + def do_GET(self): + super().do_GET() + + @retry(stop=stop_after_attempt(5), wait=wait_fixed(0.1)) + def do_HEAD(self): + super().do_HEAD() + def _http_server( root_dir, port, hostname="localhost", use_ssl=False, certfile=None, keyfile=None, threaded=True @@ -71,7 +88,16 @@ def _http_server( def start_server(): handler = partial(TestHTTPRequestHandler, directory=str(root_dir)) - httpd = HTTPServer((hostname, port), handler) + + try: + httpd = HTTPServer((hostname, port), handler) + except OSError as e: + if e.errno == 48: + httpd = HTTPServer( + (hostname, port + random.randint(0, 10000)), handler + ) # somtimes the same worker collides before port is released; retry + else: + raise e if use_ssl: if not certfile or not keyfile: @@ -87,12 +113,13 @@ def start_server(): if threaded: server_thread = threading.Thread(target=start_server, daemon=True) server_thread.start() - else: start_server() - # Wait for the server to start - for _ in range(10): + max_attempts = 100 + wait_time = 0.2 + + for attempt in range(max_attempts): try: if use_ssl: req_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) @@ -101,11 +128,15 @@ def start_server(): else: req_context = None - urlopen(f"{scheme}://{hostname}:{port}", context=req_context) - - break + with urlopen( + f"{scheme}://{hostname}:{port}", context=req_context, timeout=1.0 + ) as response: + if response.status == 200: + break except Exception: - time.sleep(0.1) + if attempt == max_attempts - 1: + raise RuntimeError(f"Server failed to start after {max_attempts} attempts") + time.sleep(wait_time) return f"{scheme}://{hostname}:{port}", server_thread @@ -114,7 +145,7 @@ def start_server(): def http_server(tmp_path_factory, worker_id): port = ( 9077 - + random.randint(0, 1000) + + random.randint(0, 10000) + (int(worker_id.lstrip("gw")) if worker_id != "master" else 0) ) # don't collide if tests running in parallel with multiple servers @@ -134,7 +165,7 @@ def http_server(tmp_path_factory, worker_id): def https_server(tmp_path_factory, worker_id): port = ( 4443 - + random.randint(0, 1000) + + random.randint(0, 10000) + (int(worker_id.lstrip("gw")) if worker_id != "master" else 0) ) # don't collide if tests running in parallel with multiple servers diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 00000000..ffc5d746 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,15 @@ +import platform +import os +import time + + +def _sync_filesystem(): + """Try to force sync of the filesystem to stabilize tests. + + On Windows, give the filesystem a moment to catch up since sync is not available. + """ + if platform.system() != "Windows": + os.sync() + else: + # On Windows, give the filesystem a moment to catch up + time.sleep(0.5) From c42dc90afccc5db436fc036aa64399cbb45b542a Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 26 Feb 2025 20:02:30 -0800 Subject: [PATCH 25/26] test flakiness --- tests/conftest.py | 24 ++++++++++++++++++++++++ tests/http_fixtures.py | 3 +++ tests/test_caching.py | 4 +++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4d0c66f6..12886abc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,10 @@ +from functools import wraps import os from pathlib import Path, PurePosixPath import platform import shutil import ssl +import time from tempfile import TemporaryDirectory from typing import Dict, Optional from urllib.parse import urlparse @@ -123,6 +125,28 @@ def create_test_dir_name(request) -> str: return test_dir +@fixture +def wait_for_mkdir(monkeypatch): + """Fixture that patches os.mkdir to wait for directory creation for tests that sometimes are flaky.""" + original_mkdir = os.mkdir + + @wraps(original_mkdir) + def wrapped_mkdir(path, *args, **kwargs): + result = original_mkdir(path, *args, **kwargs) + _sync_filesystem() + + start = time.time() + + while not os.path.exists(path) and time.time() - start < 5: + time.sleep(0.01) + _sync_filesystem() + + assert os.path.exists(path), f"Directory {path} was not created" + return result + + monkeypatch.setattr(os, "mkdir", wrapped_mkdir) + + def _azure_fixture(conn_str_env_var, adls_gen2, request, monkeypatch, assets_dir): drive = os.getenv("LIVE_AZURE_CONTAINER", DEFAULT_CONTAINER_NAME) test_dir = create_test_dir_name(request) diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index cb3339d3..f8bfa9e1 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -13,6 +13,7 @@ from pytest import fixture from tenacity import retry, stop_after_attempt, wait_fixed +from .utils import _sync_filesystem utilities_dir = Path(__file__).parent / "utilities" @@ -30,6 +31,8 @@ def do_PUT(self): else: path.parent.mkdir(parents=True, exist_ok=True) + _sync_filesystem() + with path.open("wb") as f: f.write(self.rfile.read(length)) diff --git a/tests/test_caching.py b/tests/test_caching.py index 5bfbbdc2..71cde42e 100644 --- a/tests/test_caching.py +++ b/tests/test_caching.py @@ -19,6 +19,7 @@ OverwriteNewerLocalError, ) from tests.conftest import CloudProviderTestRig +from tests.utils import _sync_filesystem def test_defaults_work_as_expected(rig: CloudProviderTestRig): @@ -189,7 +190,7 @@ def test_persistent_mode(rig: CloudProviderTestRig, tmpdir): assert client_cache_dir.exists() -def test_loc_dir(rig: CloudProviderTestRig, tmpdir): +def test_loc_dir(rig: CloudProviderTestRig, tmpdir, wait_for_mkdir): """Tests that local cache dir is used when specified and works' with the different cache modes. @@ -250,6 +251,7 @@ def test_loc_dir(rig: CloudProviderTestRig, tmpdir): assert cp.client.file_cache_mode == FileCacheMode.tmp_dir # download from cloud into the cache + _sync_filesystem() with cp.open("r") as f: _ = f.read() From a176ad5232e35aae2ea9ac5911ddddacd78de1e0 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 5 Mar 2025 21:31:30 -0700 Subject: [PATCH 26/26] refresh cert --- tests/http_fixtures.py | 13 ++++---- tests/utilities/insecure-test.crt | 34 ++++++++++---------- tests/utilities/insecure-test.key | 52 +++++++++++++++---------------- tests/utilities/insecure-test.pem | 34 ++++++++++---------- 4 files changed, 67 insertions(+), 66 deletions(-) diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py index f8bfa9e1..cb6c0990 100644 --- a/tests/http_fixtures.py +++ b/tests/http_fixtures.py @@ -148,7 +148,7 @@ def start_server(): def http_server(tmp_path_factory, worker_id): port = ( 9077 - + random.randint(0, 10000) + + random.randint(0, 50000) + (int(worker_id.lstrip("gw")) if worker_id != "master" else 0) ) # don't collide if tests running in parallel with multiple servers @@ -168,7 +168,7 @@ def http_server(tmp_path_factory, worker_id): def https_server(tmp_path_factory, worker_id): port = ( 4443 - + random.randint(0, 10000) + + random.randint(0, 50000) + (int(worker_id.lstrip("gw")) if worker_id != "master" else 0) ) # don't collide if tests running in parallel with multiple servers @@ -176,10 +176,11 @@ def https_server(tmp_path_factory, worker_id): # Command for generating self-signed localhost cert # openssl req -x509 -out localhost.crt -keyout localhost.key \ - # -newkey rsa:2048 -nodes -sha256 \ - # -subj '/CN=localhost' -extensions EXT -config <( \ - # printf "[dn]\nCN=localhost\n[req]\ndistinguished_name = dn\n[EXT]\nsubjectAltName=DNS:localhost\nkeyUsage=digitalSignature\nextendedKeyUsage=serverAuth") - # + # -newkey rsa:2048 -nodes -sha256 -days 99999 \ + # -subj '/CN=localhost' \ + # -extensions EXT -config <( \ + # printf "[dn]\nCN=localhost\n[req]\ndistinguished_name = dn\n[EXT]\nsubjectAltName=DNS:localhost\nkeyUsage=digitalSignature\nextendedKeyUsage=serverAuth" + # ) # openssl x509 -in localhost.crt -out localhost.pem -outform PEM host, server_thread = _http_server( diff --git a/tests/utilities/insecure-test.crt b/tests/utilities/insecure-test.crt index d67dbdcc..c695d787 100644 --- a/tests/utilities/insecure-test.crt +++ b/tests/utilities/insecure-test.crt @@ -1,19 +1,19 @@ -----BEGIN CERTIFICATE----- -MIIDDzCCAfegAwIBAgIUBUa66W9WhdTxm3BiIlfjGN4HjLkwDQYJKoZIhvcNAQEL -BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTI1MDIwMzAyMzIwOFoXDTI1MDMw -NTAyMzIwOFowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF -AAOCAQ8AMIIBCgKCAQEAslbXVimtI9/ndG2vPKt7VXKF6cWegxH9Jkjn+2O1G8ZQ -8cQKLlS7zfqLN7VXVOIVtun1/ZK/saW7Ce2usB21VPDKAWGLtAZiIb5lhT25sljU -u+OLlaSqMD4rMdYgr6rfNnszDQfZ/9J4Jt5WZU3GZj0UguXHfUg/fVprRYmM4zxy -9kGxuERErDz3mISkLKkA2Xgo+1Y4aCTTmLCZYF5F1DitC39dB6efd98Q+Gz7eI1R -fzlGYaEq4ISrIy1h8UnLQmVoNfxTSbuZJp9kmTHqefynxWQQDlfLJ4yeLB4rOMlQ -ZOE/829Pc0nFZSLoeha2f9S6CQ1nFIhGNwQuJYOhTwIDAQABo1kwVzAUBgNVHREE -DTALgglsb2NhbGhvc3QwCwYDVR0PBAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMB -MB0GA1UdDgQWBBTD07Fp8N4xue7MMVrlI77W8Hpi1zANBgkqhkiG9w0BAQsFAAOC -AQEAjl4O2sDgEoe26OaxA8ICwemoaQh/uobVSwWejtNgrId8V8g8HQFNt3M3qbPk -mi8ymwmbs9NuiDK8DpWrIiXIT/22DUFJ/mfNlRB5FBK0ACwWfLs+lDrqD+Xdz/FP -ZT07xFgByrSXkBz3Av1Zrm8PzjLAuA7Ar/XYnkdwwmlNujFx+W92gSkGUduPE51Y -qCO0pFoG6TVhMv6kIQqqefblDv32yiMB+Yki/xYiR/6kRfOim8oXbwsZZvI1/QI6 -76Pyw57+2ZO9hREZRRuhtz0Ku0BFOeFI2/K2D+RDGQZ8mEfO6z/OQ3D1lB+apYDa -1uhrG0/cbVccEQDqfjiliHJuEQ== +MIIDETCCAfmgAwIBAgIUJ5AHy/8xFT6iFC5yf8RN+Zh9u8EwDQYJKoZIhvcNAQEL +BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MCAXDTI1MDMwNjA0MjYxMloYDzIyOTgx +MjE5MDQyNjEyWjAUMRIwEAYDVQQDDAlsb2NhbGhvc3QwggEiMA0GCSqGSIb3DQEB +AQUAA4IBDwAwggEKAoIBAQC4MdJvJ9PDZR8qrKAEPH5aDNaj5hNjMbe4EkCC5gCy +6gWQd13nZ3IIELwIlXmZE0Rll0KRtiWAPQ53C1NGw3ipXHOFRd3nnw9pCf2pz+Cf +uXH+2YaLPP1rkRaFFKHUpbhjTWkazTPOFhgInMcI6vNEhdmIDMbj68Zp8i+ufac+ +OuqamuJXcuo2wKBAtlKnv1DhWyBjJ8f/aZxmXG2iVT5kuC1q+h+WxEn2fUxUKSj0 +1+EYlgAuIbv2E4OtydhHoFMA8PFWddd6Lhw8lUyP0DooBMdG7Rfa3vJ4+tFQHB0h +v79TtC97L/wJIV31T4r+HCNO8Jzg2Rx9F3vDCsULFB4dAgMBAAGjWTBXMBQGA1Ud +EQQNMAuCCWxvY2FsaG9zdDALBgNVHQ8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUH +AwEwHQYDVR0OBBYEFIX9dStJ5+p7TjWc3hfuwRGLPx4sMA0GCSqGSIb3DQEBCwUA +A4IBAQAKU7Alei1o3qVFouuoiJTENS5Af9878mNwyh7fhY++FeQ4UUvKeHxDrpPq +sG00Lep78OPwr4ZWKtPCMAhgbmkfS464qqaUGFdrdylBcYiKSgF0zsaXvVrxYEUG +vBfk8KGlKPgLmvMlBFZbOFIJHPPjSi2yzMzYaq7ujLQOiErs0siafy6piQXhpsZQ +bCI91bJbIXFlTjTeoYXT3wBz+2/NgbNLCo4YVfb4Ua8VVuSb+G+zUxzNnaPocQyA +QvmKwxnOUROow1CuSVge1BtvMDMkHd45NCgeYOFiem5S/0g5/N+qNsRHl0gWZOXr +a4NZtQ/cM9MUV9leNI+2baNTLdxh -----END CERTIFICATE----- diff --git a/tests/utilities/insecure-test.key b/tests/utilities/insecure-test.key index 86fd4766..e298e24f 100644 --- a/tests/utilities/insecure-test.key +++ b/tests/utilities/insecure-test.key @@ -1,28 +1,28 @@ -----BEGIN PRIVATE KEY----- -MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCyVtdWKa0j3+d0 -ba88q3tVcoXpxZ6DEf0mSOf7Y7UbxlDxxAouVLvN+os3tVdU4hW26fX9kr+xpbsJ -7a6wHbVU8MoBYYu0BmIhvmWFPbmyWNS744uVpKowPisx1iCvqt82ezMNB9n/0ngm -3lZlTcZmPRSC5cd9SD99WmtFiYzjPHL2QbG4RESsPPeYhKQsqQDZeCj7VjhoJNOY -sJlgXkXUOK0Lf10Hp5933xD4bPt4jVF/OUZhoSrghKsjLWHxSctCZWg1/FNJu5km -n2SZMep5/KfFZBAOV8snjJ4sHis4yVBk4T/zb09zScVlIuh6FrZ/1LoJDWcUiEY3 -BC4lg6FPAgMBAAECggEAF++H30ygrFv02K+QAXiSiRlh18pqR/U2INlQKXFscVng -q1JAe49r1W13GfcAzae6el5UYcA8cj9oJyD7fS+/krLuqdjJw1PH//dp8MQkFEfP -5ZfYuDDtlqQBcCLpKCqq4skORQV0wN0iuzTEK7yQkFBKXHcymyIDQ3PLSf34JtdQ -NlPrgg4k22h6YpFxZqyKkjGfVLZ3Xy34zhwoMO+9c+QpLWpUmK5h942tRbmJcL6s -TEKbBBX7lNrkcruMSW4Pa3hxtaVIkgCMzl3C5ESbGnKwyZoSXYSYLz7ZJ31t419D -n8pbd5QnT5I6kHNS1zjfDulK/atjA0MRfBzo69WToQKBgQD2YmLRzZ7+wEhR51/f -bjtegd91cBE/oc5QI1/peOzqCbE1XK3AXkl7MdnHgmvDnB/am9in5er6dP+zEk70 -zkXi8d4CCGr3rOTSNL9J/jYRT6qzrJ3W93LZyF8SLkdd+dlArm6WfmpZAd7NDjPY -rPaFWRmmZw2nelGtJ7VsF4JrmwKBgQC5TJ6s/C0ptCc8eqpqZrXGs3GKf1e2XpJa -IMTNypLRJlW5/pQV4UcDnttaW9h/KHG90SHGc8QrNw9ZsSyv0KP0nDQggAUgugbk -UDbqACoVfyK16/y09/UxDmm4+gneVfRl43GE9l6E8eWcaLDMq9bIf8d6V6fzVKbr -CncCD+k+XQKBgBcC/2nLA6OVf+uuaYTrvb3I8X79dY705NkVRLsAoVgGibeLLWs/ -IyvPbyC0LK57YXpAfg7IaZb04IDwH9G7hkl4/5w+/wac7k43wPn/NTuOrknTwHY4 -bFCs5zxCeEXttP3WjNufeP1RMGcEgZEmLmwr8zpYe6z9vq0mfUpYM8UrAoGADNyh -ovY6GEv18T++dnY8pdcsyWU/bFwyImll37bw6+2SQ7/E/aPObxTmuuXnKCRhiMBs -bL76XdmLijk/FdEQoFj7UUoxkOz4r35V4XYzdZyGh0QjHVicv1P1yK4qYzFqLmxU -I9uKAgZcXrfxlbNTzET6Tqao9L3qfCQU0KLUGM0CgYEAigDUTS/X8e+pzGAaHKrf -o5GrMEAb9yxYb9R+LVunTUKCil6fj6XeF05JeN+VwoY4KdBVRi0JF3zGPFtUuvkv -o1fsiY5jzQh3Q6nQRju1mthOsEQeii8n9xO4VxIPHxFFZpfTtIkgh1aC9GMkLyld -OsW4jP0Sd5izcqEuBtcZv4g= +MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQC4MdJvJ9PDZR8q +rKAEPH5aDNaj5hNjMbe4EkCC5gCy6gWQd13nZ3IIELwIlXmZE0Rll0KRtiWAPQ53 +C1NGw3ipXHOFRd3nnw9pCf2pz+CfuXH+2YaLPP1rkRaFFKHUpbhjTWkazTPOFhgI +nMcI6vNEhdmIDMbj68Zp8i+ufac+OuqamuJXcuo2wKBAtlKnv1DhWyBjJ8f/aZxm +XG2iVT5kuC1q+h+WxEn2fUxUKSj01+EYlgAuIbv2E4OtydhHoFMA8PFWddd6Lhw8 +lUyP0DooBMdG7Rfa3vJ4+tFQHB0hv79TtC97L/wJIV31T4r+HCNO8Jzg2Rx9F3vD +CsULFB4dAgMBAAECggEAB+MF1ptLz2bd5YSHan8FNJZhpDPWoMckQNlMa75s77w8 +Mw7ryKi1HE0yu++O8BcmdQiIjp3DVjRNd85WliXwnYtwz9dP9KbqvXQZAv0jA7vO +4Rs2NItbpenMHzRYGvdiLiOGcu6v5ezFjsYMJXpBiQc6xMvlQguadV1fHZdrdcTa +spy5qoUAu+6tSYmUWGO6WUz77iRvSN9jX3Zrm79pghoh8FMU+e74XGK+KmOjxptr +weqmjLZ3IjxNMeM1oT3sZJXitKZwo0Nn/O0IaFf43ED1Tl4xqVbF191Cg6q9Lq6k +kWO0vK+SBvB2Tuc/uaxYwXxHDZlZMQJWuGqdm185aQKBgQDp2eF+mqLkTwOkSz64 +9oFpp3xyLcDgW+HRP5pQ8VdD8QfrcquF8nkik1qqGtqV3L6C6coV0I7rT10G1hVa +gfy3S40XQNYVzX00uen3SkLAZqWtZEmKZSX/ujJzitosnGopj6DDCp8x0uFSBCCQ +k4e8t+wAafK45LsBeTVd0Rx2WQKBgQDJo+9d4r+D1oW/kaz6xDvAVTZl7RRXQqhf +HnBYzfbcq05+a/ItQuKOgtUKF+YP7AXBz0emwCKTyvWKtKn1GS+CEEFa0WBq/BQP +ZaN4gZP9Ks6JE49bRbALxTNHT/E/jfB8ZaAJJvnGLJcQnJGWtNni+NkcLUwCTd24 ++IhFsAE1ZQKBgHRlSmOlvGY/HeTsvu95Yrif8MU0GMiTczafncOkHHdoxTRgAxfM +tJaNGZeJfjsRgJHQpBFvmXNuMTNx43cFNDBXnoZwcx6vBR71WosHn2n8rYI8ZI5K +AjvjBbGU7zs9L2o4XyA1bT9zY0tbWoobpHg+Y21hTrrQvnPhTLrBlpUhAoGAfxW0 +co7Lt7MH/vI4fKAbLKBINbtTe5/GcJTFqCK/uNq7ERDFxOMH2Vv6G29uj5BpHHqg +pScpY86iLPaqu5BREf9ewXd+9k7D8eRUQEoi1IgbPee3DYaBaxmxtPtciHrUHAMM +91e6Sr2lxFF5rOt0/AltBMZDxpv+52q0sH86c8UCgYAMIptuidG15v/yiI5jT9md +7fX3gRqX2y9RytLVjhbx6Qgu7ngZ0m/7ASyrpTBI6KX7ipOXz6QrcKyxF5YxC8jm +Svh9y/HFFuXVVrAV6fw64RiXXfnr1RfzuLx6w5TlNgbSbElqyQ1gRhTnMpURvv64 +iP0Pxge/eazpbF5BB7eAVA== -----END PRIVATE KEY----- diff --git a/tests/utilities/insecure-test.pem b/tests/utilities/insecure-test.pem index d67dbdcc..c695d787 100644 --- a/tests/utilities/insecure-test.pem +++ b/tests/utilities/insecure-test.pem @@ -1,19 +1,19 @@ -----BEGIN CERTIFICATE----- -MIIDDzCCAfegAwIBAgIUBUa66W9WhdTxm3BiIlfjGN4HjLkwDQYJKoZIhvcNAQEL -BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTI1MDIwMzAyMzIwOFoXDTI1MDMw -NTAyMzIwOFowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF -AAOCAQ8AMIIBCgKCAQEAslbXVimtI9/ndG2vPKt7VXKF6cWegxH9Jkjn+2O1G8ZQ -8cQKLlS7zfqLN7VXVOIVtun1/ZK/saW7Ce2usB21VPDKAWGLtAZiIb5lhT25sljU -u+OLlaSqMD4rMdYgr6rfNnszDQfZ/9J4Jt5WZU3GZj0UguXHfUg/fVprRYmM4zxy -9kGxuERErDz3mISkLKkA2Xgo+1Y4aCTTmLCZYF5F1DitC39dB6efd98Q+Gz7eI1R -fzlGYaEq4ISrIy1h8UnLQmVoNfxTSbuZJp9kmTHqefynxWQQDlfLJ4yeLB4rOMlQ -ZOE/829Pc0nFZSLoeha2f9S6CQ1nFIhGNwQuJYOhTwIDAQABo1kwVzAUBgNVHREE -DTALgglsb2NhbGhvc3QwCwYDVR0PBAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMB -MB0GA1UdDgQWBBTD07Fp8N4xue7MMVrlI77W8Hpi1zANBgkqhkiG9w0BAQsFAAOC -AQEAjl4O2sDgEoe26OaxA8ICwemoaQh/uobVSwWejtNgrId8V8g8HQFNt3M3qbPk -mi8ymwmbs9NuiDK8DpWrIiXIT/22DUFJ/mfNlRB5FBK0ACwWfLs+lDrqD+Xdz/FP -ZT07xFgByrSXkBz3Av1Zrm8PzjLAuA7Ar/XYnkdwwmlNujFx+W92gSkGUduPE51Y -qCO0pFoG6TVhMv6kIQqqefblDv32yiMB+Yki/xYiR/6kRfOim8oXbwsZZvI1/QI6 -76Pyw57+2ZO9hREZRRuhtz0Ku0BFOeFI2/K2D+RDGQZ8mEfO6z/OQ3D1lB+apYDa -1uhrG0/cbVccEQDqfjiliHJuEQ== +MIIDETCCAfmgAwIBAgIUJ5AHy/8xFT6iFC5yf8RN+Zh9u8EwDQYJKoZIhvcNAQEL +BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MCAXDTI1MDMwNjA0MjYxMloYDzIyOTgx +MjE5MDQyNjEyWjAUMRIwEAYDVQQDDAlsb2NhbGhvc3QwggEiMA0GCSqGSIb3DQEB +AQUAA4IBDwAwggEKAoIBAQC4MdJvJ9PDZR8qrKAEPH5aDNaj5hNjMbe4EkCC5gCy +6gWQd13nZ3IIELwIlXmZE0Rll0KRtiWAPQ53C1NGw3ipXHOFRd3nnw9pCf2pz+Cf +uXH+2YaLPP1rkRaFFKHUpbhjTWkazTPOFhgInMcI6vNEhdmIDMbj68Zp8i+ufac+ +OuqamuJXcuo2wKBAtlKnv1DhWyBjJ8f/aZxmXG2iVT5kuC1q+h+WxEn2fUxUKSj0 +1+EYlgAuIbv2E4OtydhHoFMA8PFWddd6Lhw8lUyP0DooBMdG7Rfa3vJ4+tFQHB0h +v79TtC97L/wJIV31T4r+HCNO8Jzg2Rx9F3vDCsULFB4dAgMBAAGjWTBXMBQGA1Ud +EQQNMAuCCWxvY2FsaG9zdDALBgNVHQ8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUH +AwEwHQYDVR0OBBYEFIX9dStJ5+p7TjWc3hfuwRGLPx4sMA0GCSqGSIb3DQEBCwUA +A4IBAQAKU7Alei1o3qVFouuoiJTENS5Af9878mNwyh7fhY++FeQ4UUvKeHxDrpPq +sG00Lep78OPwr4ZWKtPCMAhgbmkfS464qqaUGFdrdylBcYiKSgF0zsaXvVrxYEUG +vBfk8KGlKPgLmvMlBFZbOFIJHPPjSi2yzMzYaq7ujLQOiErs0siafy6piQXhpsZQ +bCI91bJbIXFlTjTeoYXT3wBz+2/NgbNLCo4YVfb4Ua8VVuSb+G+zUxzNnaPocQyA +QvmKwxnOUROow1CuSVge1BtvMDMkHd45NCgeYOFiem5S/0g5/N+qNsRHl0gWZOXr +a4NZtQ/cM9MUV9leNI+2baNTLdxh -----END CERTIFICATE-----