From c09bf736990f3ca54be896b66b47f9822443e8f3 Mon Sep 17 00:00:00 2001 From: "Ross A. Wollman" Date: Fri, 24 Jun 2022 23:33:38 -0700 Subject: [PATCH 1/8] port: recordUrlFilter --- playwright/_impl/_browser.py | 13 ++++++++- playwright/_impl/_browser_type.py | 3 +- playwright/async_api/_generated.py | 15 ++++++++-- playwright/sync_api/_generated.py | 15 ++++++++-- scripts/expected_api_mismatch.txt | 3 -- tests/async/test_har.py | 46 ++++++++++++++++++++++++++++++ tests/sync/test_har.py | 39 +++++++++++++++++++++++++ 7 files changed, 123 insertions(+), 11 deletions(-) diff --git a/playwright/_impl/_browser.py b/playwright/_impl/_browser.py index dc04e8cf7..895ada6cc 100644 --- a/playwright/_impl/_browser.py +++ b/playwright/_impl/_browser.py @@ -16,7 +16,7 @@ import json from pathlib import Path from types import SimpleNamespace -from typing import TYPE_CHECKING, Any, Dict, List, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Pattern, Union, cast from playwright._impl._api_structures import ( Geolocation, @@ -40,6 +40,7 @@ from playwright._impl._local_utils import LocalUtils from playwright._impl._network import serialize_headers from playwright._impl._page import Page +from playwright._impl._str_utils import escape_regex_flags if TYPE_CHECKING: # pragma: no cover from playwright._impl._browser_type import BrowserType @@ -116,6 +117,7 @@ async def new_context( baseURL: str = None, strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, + recordHarUrlFilter: Union[Pattern, str] = None, ) -> BrowserContext: params = locals_to_params(locals()) await normalize_context_params(self._connection._is_sync, params) @@ -160,6 +162,7 @@ async def new_page( baseURL: str = None, strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, + recordHarUrlFilter: Union[Pattern, str] = None, ) -> Page: params = locals_to_params(locals()) context = await self.new_context(**params) @@ -220,6 +223,14 @@ async def normalize_context_params(is_sync: bool, params: Dict) -> None: if "recordHarOmitContent" in params: params["recordHar"]["omitContent"] = params["recordHarOmitContent"] del params["recordHarOmitContent"] + if "recordHarUrlFilter" in params: + opt = params["recordHarUrlFilter"] + if isinstance(opt, str): + params["recordHar"]["urlGlob"] = opt + if isinstance(opt, Pattern): + params["recordHar"]["urlRegexSource"] = opt.pattern + params["recordHar"]["urlRegexFlags"] = escape_regex_flags(opt) + del params["recordHarUrlFilter"] del params["recordHarPath"] if "recordVideoDir" in params: params["recordVideo"] = {"dir": str(params["recordVideoDir"])} diff --git a/playwright/_impl/_browser_type.py b/playwright/_impl/_browser_type.py index 500e8b138..2d9e1adb2 100644 --- a/playwright/_impl/_browser_type.py +++ b/playwright/_impl/_browser_type.py @@ -15,7 +15,7 @@ import asyncio import pathlib from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Optional, Union, cast +from typing import TYPE_CHECKING, Dict, List, Optional, Pattern, Union, cast from playwright._impl._api_structures import ( Geolocation, @@ -139,6 +139,7 @@ async def launch_persistent_context( baseURL: str = None, strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, + recordHarUrlFilter: Union[Pattern, str] = None, ) -> BrowserContext: userDataDir = str(Path(userDataDir)) params = locals_to_params(locals()) diff --git a/playwright/async_api/_generated.py b/playwright/async_api/_generated.py index a47af60a0..656d4a318 100644 --- a/playwright/async_api/_generated.py +++ b/playwright/async_api/_generated.py @@ -10647,7 +10647,8 @@ async def new_context( storage_state: typing.Union[StorageState, str, pathlib.Path] = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None ) -> "BrowserContext": """Browser.new_context @@ -10756,6 +10757,7 @@ async def new_context( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] Returns ------- @@ -10795,6 +10797,7 @@ async def new_context( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, ) ) @@ -10831,7 +10834,8 @@ async def new_page( storage_state: typing.Union[StorageState, str, pathlib.Path] = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None ) -> "Page": """Browser.new_page @@ -10935,6 +10939,7 @@ async def new_page( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] Returns ------- @@ -10974,6 +10979,7 @@ async def new_page( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, ) ) @@ -11269,7 +11275,8 @@ async def launch_persistent_context( record_video_size: ViewportSize = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None ) -> "BrowserContext": """BrowserType.launch_persistent_context @@ -11413,6 +11420,7 @@ async def launch_persistent_context( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] Returns ------- @@ -11466,6 +11474,7 @@ async def launch_persistent_context( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, ) ) diff --git a/playwright/sync_api/_generated.py b/playwright/sync_api/_generated.py index 7882d2282..bf00af8b4 100644 --- a/playwright/sync_api/_generated.py +++ b/playwright/sync_api/_generated.py @@ -10669,7 +10669,8 @@ def new_context( storage_state: typing.Union[StorageState, str, pathlib.Path] = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None ) -> "BrowserContext": """Browser.new_context @@ -10778,6 +10779,7 @@ def new_context( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] Returns ------- @@ -10818,6 +10820,7 @@ def new_context( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, ) ) ) @@ -10855,7 +10858,8 @@ def new_page( storage_state: typing.Union[StorageState, str, pathlib.Path] = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None ) -> "Page": """Browser.new_page @@ -10959,6 +10963,7 @@ def new_page( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] Returns ------- @@ -10999,6 +11004,7 @@ def new_page( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, ) ) ) @@ -11299,7 +11305,8 @@ def launch_persistent_context( record_video_size: ViewportSize = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None ) -> "BrowserContext": """BrowserType.launch_persistent_context @@ -11443,6 +11450,7 @@ def launch_persistent_context( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] Returns ------- @@ -11497,6 +11505,7 @@ def launch_persistent_context( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, ) ) ) diff --git a/scripts/expected_api_mismatch.txt b/scripts/expected_api_mismatch.txt index e362d28d4..5fad86551 100644 --- a/scripts/expected_api_mismatch.txt +++ b/scripts/expected_api_mismatch.txt @@ -20,9 +20,6 @@ Method not implemented: Error.message Method not implemented: PlaywrightAssertions.expect # Pending 1.23 ports -Parameter not implemented: BrowserType.launch_persistent_context(record_har_url_filter=) Method not implemented: BrowserContext.route_from_har Method not implemented: Route.fallback -Parameter not implemented: Browser.new_page(record_har_url_filter=) Method not implemented: Page.route_from_har -Parameter not implemented: Browser.new_context(record_har_url_filter=) diff --git a/tests/async/test_har.py b/tests/async/test_har.py index 6cb6f2472..c5b65fe05 100644 --- a/tests/async/test_har.py +++ b/tests/async/test_har.py @@ -14,6 +14,10 @@ import json import os +import re + +from playwright.async_api import Browser +from tests.server import Server async def test_should_work(browser, server, tmpdir): @@ -72,3 +76,45 @@ async def test_should_include_content(browser, server, tmpdir): content1 = log["entries"][0]["response"]["content"] assert content1["mimeType"] == "text/html" assert "HAR Page" in content1["text"] + + +async def test_should_filter_by_glob( + browser: Browser, server: Server, tmpdir: str +) -> None: + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + base_url=server.PREFIX, + record_har_path=path, + record_har_url_filter="/*.css", + ignore_https_errors=True, + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert len(log["entries"]) == 1 + assert log["entries"][0]["request"]["url"].endswith("one-style.css") + + +async def test_should_filter_by_regexp( + browser: Browser, server: Server, tmpdir: str +) -> None: + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + base_url=server.PREFIX, + record_har_path=path, + record_har_url_filter=re.compile("HAR.X?HTML", re.I), + ignore_https_errors=True, + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert len(log["entries"]) == 1 + assert log["entries"][0]["request"]["url"].endswith("har.html") diff --git a/tests/sync/test_har.py b/tests/sync/test_har.py index 2561c7fd6..d44e79171 100644 --- a/tests/sync/test_har.py +++ b/tests/sync/test_har.py @@ -14,6 +14,7 @@ import json import os +import re from pathlib import Path from playwright.sync_api import Browser @@ -61,3 +62,41 @@ def test_should_include_content(browser: Browser, server: Server, tmpdir: Path) content1 = log["entries"][0]["response"]["content"] assert content1["mimeType"] == "text/html" assert "HAR Page" in content1["text"] + + +def test_should_filter_by_glob(browser: Browser, server: Server, tmpdir: str) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context( + base_url=server.PREFIX, + record_har_path=path, + record_har_url_filter="/*.css", + ignore_https_errors=True, + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert len(log["entries"]) == 1 + assert log["entries"][0]["request"]["url"].endswith("one-style.css") + + +def test_should_filter_by_regexp(browser: Browser, server: Server, tmpdir: str) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context( + base_url=server.PREFIX, + record_har_path=path, + record_har_url_filter=re.compile("HAR.X?HTML", re.I), + ignore_https_errors=True, + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert len(log["entries"]) == 1 + assert log["entries"][0]["request"]["url"].endswith("har.html") From 49ba98b4280b549f3da48134a4d4d3c0edfbc370 Mon Sep 17 00:00:00 2001 From: "Ross A. Wollman" Date: Sat, 25 Jun 2022 13:44:37 -0700 Subject: [PATCH 2/8] explicitly set charset to fix FF bots --- tests/async/test_har.py | 2 +- tests/server.py | 5 ++++- tests/sync/test_har.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/async/test_har.py b/tests/async/test_har.py index c5b65fe05..cb78ac3de 100644 --- a/tests/async/test_har.py +++ b/tests/async/test_har.py @@ -74,7 +74,7 @@ async def test_should_include_content(browser, server, tmpdir): log = data["log"] content1 = log["entries"][0]["response"]["content"] - assert content1["mimeType"] == "text/html" + assert content1["mimeType"] == "text/html; charset=utf-8" assert "HAR Page" in content1["text"] diff --git a/tests/server.py b/tests/server.py index f63ea5ad8..18e549a65 100644 --- a/tests/server.py +++ b/tests/server.py @@ -134,7 +134,10 @@ def process(self) -> None: file_content = None try: file_content = (static_path / path[1:]).read_bytes() - request.setHeader(b"Content-Type", mimetypes.guess_type(path)[0]) + content_type = mimetypes.guess_type(path)[0] + if content_type and content_type.startswith("text/"): + content_type += "; charset=utf-8" + request.setHeader(b"Content-Type", content_type) request.setHeader(b"Cache-Control", "no-cache, no-store") if path in gzip_routes: request.setHeader("Content-Encoding", "gzip") diff --git a/tests/sync/test_har.py b/tests/sync/test_har.py index d44e79171..906fba703 100644 --- a/tests/sync/test_har.py +++ b/tests/sync/test_har.py @@ -60,7 +60,7 @@ def test_should_include_content(browser: Browser, server: Server, tmpdir: Path) log = data["log"] content1 = log["entries"][0]["response"]["content"] - assert content1["mimeType"] == "text/html" + assert content1["mimeType"] == "text/html; charset=utf-8" assert "HAR Page" in content1["text"] From 62565e01556aa2de8c106256e78a765d0655b623 Mon Sep 17 00:00:00 2001 From: "Ross A. Wollman" Date: Sat, 25 Jun 2022 14:22:18 -0700 Subject: [PATCH 3/8] roll to next driver --- playwright/_impl/_browser.py | 6 +++++ playwright/_impl/_browser_type.py | 4 +++ playwright/_impl/_helper.py | 2 ++ playwright/async_api/_generated.py | 39 +++++++++++++++++++++++++++--- playwright/sync_api/_generated.py | 39 +++++++++++++++++++++++++++--- setup.py | 2 +- 6 files changed, 85 insertions(+), 7 deletions(-) diff --git a/playwright/_impl/_browser.py b/playwright/_impl/_browser.py index 895ada6cc..893905f4d 100644 --- a/playwright/_impl/_browser.py +++ b/playwright/_impl/_browser.py @@ -31,6 +31,8 @@ from playwright._impl._helper import ( ColorScheme, ForcedColors, + HarContentPolicy, + HarMode, ReducedMotion, ServiceWorkersPolicy, async_readfile, @@ -118,6 +120,8 @@ async def new_context( strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, recordHarUrlFilter: Union[Pattern, str] = None, + recordHarMode: HarMode = None, + recordHarContent: HarContentPolicy = None, ) -> BrowserContext: params = locals_to_params(locals()) await normalize_context_params(self._connection._is_sync, params) @@ -163,6 +167,8 @@ async def new_page( strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, recordHarUrlFilter: Union[Pattern, str] = None, + recordHarMode: HarMode = None, + recordHarContent: HarContentPolicy = None, ) -> Page: params = locals_to_params(locals()) context = await self.new_context(**params) diff --git a/playwright/_impl/_browser_type.py b/playwright/_impl/_browser_type.py index 2d9e1adb2..761fb57c9 100644 --- a/playwright/_impl/_browser_type.py +++ b/playwright/_impl/_browser_type.py @@ -36,6 +36,8 @@ ColorScheme, Env, ForcedColors, + HarContentPolicy, + HarMode, ReducedMotion, ServiceWorkersPolicy, locals_to_params, @@ -140,6 +142,8 @@ async def launch_persistent_context( strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, recordHarUrlFilter: Union[Pattern, str] = None, + recordHarMode: HarMode = None, + recordHarContent: HarContentPolicy = None, ) -> BrowserContext: userDataDir = str(Path(userDataDir)) params = locals_to_params(locals()) diff --git a/playwright/_impl/_helper.py b/playwright/_impl/_helper.py index 51f15f437..71d55b917 100644 --- a/playwright/_impl/_helper.py +++ b/playwright/_impl/_helper.py @@ -65,6 +65,8 @@ KeyboardModifier = Literal["Alt", "Control", "Meta", "Shift"] MouseButton = Literal["left", "middle", "right"] ServiceWorkersPolicy = Literal["allow", "block"] +HarMode = Literal["full", "minimal"] +HarContentPolicy = Literal["attach", "embed", "omit"] class ErrorPayload(TypedDict, total=False): diff --git a/playwright/async_api/_generated.py b/playwright/async_api/_generated.py index 656d4a318..1eca729b8 100644 --- a/playwright/async_api/_generated.py +++ b/playwright/async_api/_generated.py @@ -10648,7 +10648,9 @@ async def new_context( base_url: str = None, strict_selectors: bool = None, service_workers: Literal["allow", "block"] = None, - record_har_url_filter: typing.Union[str, typing.Pattern] = None + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "BrowserContext": """Browser.new_context @@ -10758,6 +10760,13 @@ async def new_context( - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -10798,6 +10807,8 @@ async def new_context( strictSelectors=strict_selectors, serviceWorkers=service_workers, recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) @@ -10835,7 +10846,9 @@ async def new_page( base_url: str = None, strict_selectors: bool = None, service_workers: Literal["allow", "block"] = None, - record_har_url_filter: typing.Union[str, typing.Pattern] = None + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "Page": """Browser.new_page @@ -10940,6 +10953,13 @@ async def new_page( - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -10980,6 +11000,8 @@ async def new_page( strictSelectors=strict_selectors, serviceWorkers=service_workers, recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) @@ -11276,7 +11298,9 @@ async def launch_persistent_context( base_url: str = None, strict_selectors: bool = None, service_workers: Literal["allow", "block"] = None, - record_har_url_filter: typing.Union[str, typing.Pattern] = None + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "BrowserContext": """BrowserType.launch_persistent_context @@ -11421,6 +11445,13 @@ async def launch_persistent_context( - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -11475,6 +11506,8 @@ async def launch_persistent_context( strictSelectors=strict_selectors, serviceWorkers=service_workers, recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) diff --git a/playwright/sync_api/_generated.py b/playwright/sync_api/_generated.py index bf00af8b4..88c3aca7d 100644 --- a/playwright/sync_api/_generated.py +++ b/playwright/sync_api/_generated.py @@ -10670,7 +10670,9 @@ def new_context( base_url: str = None, strict_selectors: bool = None, service_workers: Literal["allow", "block"] = None, - record_har_url_filter: typing.Union[str, typing.Pattern] = None + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "BrowserContext": """Browser.new_context @@ -10780,6 +10782,13 @@ def new_context( - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -10821,6 +10830,8 @@ def new_context( strictSelectors=strict_selectors, serviceWorkers=service_workers, recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) ) @@ -10859,7 +10870,9 @@ def new_page( base_url: str = None, strict_selectors: bool = None, service_workers: Literal["allow", "block"] = None, - record_har_url_filter: typing.Union[str, typing.Pattern] = None + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "Page": """Browser.new_page @@ -10964,6 +10977,13 @@ def new_page( - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -11005,6 +11025,8 @@ def new_page( strictSelectors=strict_selectors, serviceWorkers=service_workers, recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) ) @@ -11306,7 +11328,9 @@ def launch_persistent_context( base_url: str = None, strict_selectors: bool = None, service_workers: Literal["allow", "block"] = None, - record_har_url_filter: typing.Union[str, typing.Pattern] = None + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "BrowserContext": """BrowserType.launch_persistent_context @@ -11451,6 +11475,13 @@ def launch_persistent_context( - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -11506,6 +11537,8 @@ def launch_persistent_context( strictSelectors=strict_selectors, serviceWorkers=service_workers, recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) ) diff --git a/setup.py b/setup.py index 2023d03be..c3376f354 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ InWheel = None from wheel.bdist_wheel import bdist_wheel as BDistWheelCommand -driver_version = "1.23.0-beta-1656026605000" +driver_version = "1.23.0-beta-1656093125000" def extractall(zip: zipfile.ZipFile, path: str) -> None: From 9b2ec121cb4548a166ffa47678f0554acc95df0e Mon Sep 17 00:00:00 2001 From: "Ross A. Wollman" Date: Sat, 25 Jun 2022 15:23:44 -0700 Subject: [PATCH 4/8] add other recordHar options --- playwright/_impl/_browser.py | 19 +++++- tests/async/test_har.py | 109 ++++++++++++++++++++++++++++++++- tests/server.py | 1 + tests/sync/test_har.py | 113 ++++++++++++++++++++++++++++++++++- 4 files changed, 237 insertions(+), 5 deletions(-) diff --git a/playwright/_impl/_browser.py b/playwright/_impl/_browser.py index 893905f4d..4f5a72878 100644 --- a/playwright/_impl/_browser.py +++ b/playwright/_impl/_browser.py @@ -226,9 +226,6 @@ async def normalize_context_params(is_sync: bool, params: Dict) -> None: if "recordHarPath" in params: recordHar: Dict[str, Any] = {"path": str(params["recordHarPath"])} params["recordHar"] = recordHar - if "recordHarOmitContent" in params: - params["recordHar"]["omitContent"] = params["recordHarOmitContent"] - del params["recordHarOmitContent"] if "recordHarUrlFilter" in params: opt = params["recordHarUrlFilter"] if isinstance(opt, str): @@ -237,6 +234,22 @@ async def normalize_context_params(is_sync: bool, params: Dict) -> None: params["recordHar"]["urlRegexSource"] = opt.pattern params["recordHar"]["urlRegexFlags"] = escape_regex_flags(opt) del params["recordHarUrlFilter"] + if "recordHarMode" in params: + params["recordHar"]["mode"] = params["recordHarMode"] + del params["recordHarMode"] + + new_content_api = None + old_content_api = None + if "recordHarContent" in params: + new_content_api = params["recordHarContent"] + del params["recordHarContent"] + if "recordHarOmitContent" in params: + old_content_api = params["recordHarOmitContent"] + del params["recordHarOmitContent"] + content = new_content_api or ("omit" if old_content_api else None) + if content: + params["recordHar"]["content"] = content + del params["recordHarPath"] if "recordVideoDir" in params: params["recordVideo"] = {"dir": str(params["recordVideoDir"])} diff --git a/tests/async/test_har.py b/tests/async/test_har.py index cb78ac3de..00d02d32d 100644 --- a/tests/async/test_har.py +++ b/tests/async/test_har.py @@ -15,6 +15,7 @@ import json import os import re +import zipfile from playwright.async_api import Browser from tests.server import Server @@ -32,6 +33,24 @@ async def test_should_work(browser, server, tmpdir): async def test_should_omit_content(browser, server, tmpdir): + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + record_har_path=path, + record_har_content="omit", + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + content1 = log["entries"][0]["response"]["content"] + assert "text" not in content1 + assert "encoding" not in content1 + + +async def test_should_omit_content_legacy(browser, server, tmpdir): path = os.path.join(tmpdir, "log.har") context = await browser.new_context( record_har_path=path, record_har_omit_content=True @@ -44,10 +63,67 @@ async def test_should_omit_content(browser, server, tmpdir): assert "log" in data log = data["log"] content1 = log["entries"][0]["response"]["content"] - assert "text" in content1 + assert "text" not in content1 assert "encoding" not in content1 +async def test_should_attach_content(browser, server, tmpdir, is_firefox): + path = os.path.join(tmpdir, "log.har.zip") + context = await browser.new_context( + record_har_path=path, + record_har_content="attach", + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await page.evaluate("() => fetch('/pptr.png').then(r => r.arrayBuffer())") + await context.close() + with zipfile.ZipFile(path) as z: + with z.open("har.har") as har: + entries = json.load(har)["log"]["entries"] + + assert "encoding" not in entries[0]["response"]["content"] + assert ( + entries[0]["response"]["content"]["mimeType"] + == "text/html; charset=utf-8" + ) + assert ( + "75841480e2606c03389077304342fac2c58ccb1b" + in entries[0]["response"]["content"]["_file"] + ) + assert entries[0]["response"]["content"]["size"] >= 96 + assert entries[0]["response"]["content"]["compression"] == 0 + + assert "encoding" not in entries[1]["response"]["content"] + assert ( + entries[1]["response"]["content"]["mimeType"] + == "text/css; charset=utf-8" + ) + assert ( + "79f739d7bc88e80f55b9891a22bf13a2b4e18adb" + in entries[1]["response"]["content"]["_file"] + ) + assert entries[1]["response"]["content"]["size"] >= 37 + assert entries[1]["response"]["content"]["compression"] == 0 + + assert "encoding" not in entries[2]["response"]["content"] + assert entries[2]["response"]["content"]["mimeType"] == "image/png" + assert ( + "a4c3a18f0bb83f5d9fe7ce561e065c36205762fa" + in entries[2]["response"]["content"]["_file"] + ) + assert entries[2]["response"]["content"]["size"] >= 6000 + assert entries[2]["response"]["content"]["compression"] == 0 + + with z.open("75841480e2606c03389077304342fac2c58ccb1b.html") as f: + assert b"HAR Page" in f.read() + + with z.open("79f739d7bc88e80f55b9891a22bf13a2b4e18adb.css") as f: + assert b"pink" in f.read() + + with z.open("a4c3a18f0bb83f5d9fe7ce561e065c36205762fa.png") as f: + assert len(f.read()) == entries[2]["response"]["content"]["size"] + + async def test_should_not_omit_content(browser, server, tmpdir): path = os.path.join(tmpdir, "log.har") context = await browser.new_context( @@ -78,6 +154,37 @@ async def test_should_include_content(browser, server, tmpdir): assert "HAR Page" in content1["text"] +async def test_should_default_to_full_mode(browser, server, tmpdir): + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + record_har_path=path, + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert log["entries"][0]["request"]["bodySize"] >= 0 + + +async def test_should_support_minimal_mode(browser, server, tmpdir): + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + record_har_path=path, + record_har_mode="minimal", + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert log["entries"][0]["request"]["bodySize"] == -1 + + async def test_should_filter_by_glob( browser: Browser, server: Server, tmpdir: str ) -> None: diff --git a/tests/server.py b/tests/server.py index 18e549a65..a5cd42019 100644 --- a/tests/server.py +++ b/tests/server.py @@ -139,6 +139,7 @@ def process(self) -> None: content_type += "; charset=utf-8" request.setHeader(b"Content-Type", content_type) request.setHeader(b"Cache-Control", "no-cache, no-store") + request.setHeader(b"Content-Length", str(len(file_content))) if path in gzip_routes: request.setHeader("Content-Encoding", "gzip") request.write(gzip.compress(file_content)) diff --git a/tests/sync/test_har.py b/tests/sync/test_har.py index 906fba703..479c97e0a 100644 --- a/tests/sync/test_har.py +++ b/tests/sync/test_har.py @@ -15,6 +15,7 @@ import json import os import re +import zipfile from pathlib import Path from playwright.sync_api import Browser @@ -33,6 +34,24 @@ def test_should_work(browser: Browser, server: Server, tmpdir: Path) -> None: def test_should_omit_content(browser: Browser, server: Server, tmpdir: Path) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context(record_har_path=path, record_har_content="omit") + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + + content1 = log["entries"][0]["response"]["content"] + assert "text" not in content1 + assert "encoding" not in content1 + + +def test_should_omit_content_legacy( + browser: Browser, server: Server, tmpdir: Path +) -> None: path = os.path.join(tmpdir, "log.har") context = browser.new_context(record_har_path=path, record_har_omit_content=True) page = context.new_page() @@ -44,10 +63,67 @@ def test_should_omit_content(browser: Browser, server: Server, tmpdir: Path) -> log = data["log"] content1 = log["entries"][0]["response"]["content"] - assert "text" in content1 + assert "text" not in content1 assert "encoding" not in content1 +def test_should_attach_content(browser: Browser, server: Server, tmpdir: Path) -> None: + path = os.path.join(tmpdir, "log.har.zip") + context = browser.new_context( + record_har_path=path, + record_har_content="attach", + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + page.evaluate("() => fetch('/pptr.png').then(r => r.arrayBuffer())") + context.close() + with zipfile.ZipFile(path) as z: + with z.open("har.har") as har: + entries = json.load(har)["log"]["entries"] + + assert "encoding" not in entries[0]["response"]["content"] + assert ( + entries[0]["response"]["content"]["mimeType"] + == "text/html; charset=utf-8" + ) + assert ( + "75841480e2606c03389077304342fac2c58ccb1b" + in entries[0]["response"]["content"]["_file"] + ) + assert entries[0]["response"]["content"]["size"] >= 96 + assert entries[0]["response"]["content"]["compression"] == 0 + + assert "encoding" not in entries[1]["response"]["content"] + assert ( + entries[1]["response"]["content"]["mimeType"] + == "text/css; charset=utf-8" + ) + assert ( + "79f739d7bc88e80f55b9891a22bf13a2b4e18adb" + in entries[1]["response"]["content"]["_file"] + ) + assert entries[1]["response"]["content"]["size"] >= 37 + assert entries[1]["response"]["content"]["compression"] == 0 + + assert "encoding" not in entries[2]["response"]["content"] + assert entries[2]["response"]["content"]["mimeType"] == "image/png" + assert ( + "a4c3a18f0bb83f5d9fe7ce561e065c36205762fa" + in entries[2]["response"]["content"]["_file"] + ) + assert entries[2]["response"]["content"]["size"] >= 6000 + assert entries[2]["response"]["content"]["compression"] == 0 + + with z.open("75841480e2606c03389077304342fac2c58ccb1b.html") as f: + assert b"HAR Page" in f.read() + + with z.open("79f739d7bc88e80f55b9891a22bf13a2b4e18adb.css") as f: + assert b"pink" in f.read() + + with z.open("a4c3a18f0bb83f5d9fe7ce561e065c36205762fa.png") as f: + assert len(f.read()) == entries[2]["response"]["content"]["size"] + + def test_should_include_content(browser: Browser, server: Server, tmpdir: Path) -> None: path = os.path.join(tmpdir, "log.har") context = browser.new_context(record_har_path=path) @@ -64,6 +140,41 @@ def test_should_include_content(browser: Browser, server: Server, tmpdir: Path) assert "HAR Page" in content1["text"] +def test_should_default_to_full_mode( + browser: Browser, server: Server, tmpdir: Path +) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context( + record_har_path=path, + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert log["entries"][0]["request"]["bodySize"] >= 0 + + +def test_should_support_minimal_mode( + browser: Browser, server: Server, tmpdir: Path +) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context( + record_har_path=path, + record_har_mode="minimal", + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert log["entries"][0]["request"]["bodySize"] == -1 + + def test_should_filter_by_glob(browser: Browser, server: Server, tmpdir: str) -> None: path = os.path.join(tmpdir, "log.har") context = browser.new_context( From 1490bca604c1e755029cf7d210b900b41f766899 Mon Sep 17 00:00:00 2001 From: "Ross A. Wollman" Date: Mon, 27 Jun 2022 11:40:58 -0700 Subject: [PATCH 5/8] rebaseline with charset --- tests/async/test_request_intercept.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/async/test_request_intercept.py b/tests/async/test_request_intercept.py index 1e5fca4fb..11def1744 100644 --- a/tests/async/test_request_intercept.py +++ b/tests/async/test_request_intercept.py @@ -131,13 +131,13 @@ async def test_should_give_access_to_the_intercepted_response( assert response.status_text == "OK" assert response.ok is True assert response.url.endswith("/title.html") is True - assert response.headers["content-type"] == "text/html" + assert response.headers["content-type"] == "text/html; charset=utf-8" assert list( filter( lambda header: header["name"].lower() == "content-type", response.headers_array, ) - ) == [{"name": "Content-Type", "value": "text/html"}] + ) == [{"name": "Content-Type", "value": "text/html; charset=utf-8"}] await asyncio.gather( route.fulfill(response=response), From 75c9a651744bb256bf9db62ce8e22d6e54181bed Mon Sep 17 00:00:00 2001 From: "Ross A. Wollman" Date: Mon, 27 Jun 2022 11:47:34 -0700 Subject: [PATCH 6/8] match test to upstream --- tests/async/test_request_intercept.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/async/test_request_intercept.py b/tests/async/test_request_intercept.py index 11def1744..39ccf3d3f 100644 --- a/tests/async/test_request_intercept.py +++ b/tests/async/test_request_intercept.py @@ -43,7 +43,9 @@ async def handle(route: Route): async def test_should_fulfill_response_with_empty_body(page: Page, server: Server): async def handle(route: Route): response = await page.request.fetch(route.request) - await route.fulfill(response=response, status=201, body="") + await route.fulfill( + response=response, status=201, body="", headers={"content-length": "0"} + ) await page.route("**/*", handle) response = await page.goto(server.PREFIX + "/title.html") From 7de7efc2d62b9731c6023df54e535be9d38c43d6 Mon Sep 17 00:00:00 2001 From: "Ross A. Wollman" Date: Mon, 27 Jun 2022 11:56:57 -0700 Subject: [PATCH 7/8] fix sync test --- tests/sync/test_request_intercept.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/sync/test_request_intercept.py b/tests/sync/test_request_intercept.py index dc66000e7..dc714e832 100644 --- a/tests/sync/test_request_intercept.py +++ b/tests/sync/test_request_intercept.py @@ -43,7 +43,9 @@ def handle(route: Route) -> None: def test_should_fulfill_response_with_empty_body(page: Page, server: Server) -> None: def handle(route: Route) -> None: response = page.request.fetch(route.request) - route.fulfill(response=response, status=201, body="") + route.fulfill( + response=response, status=201, body="", headers={"content-length": "0"} + ) page.route("**/*", handle) response = page.goto(server.PREFIX + "/title.html") From 16aa2e23efdded016ca27b8d57d11e607fd4e21d Mon Sep 17 00:00:00 2001 From: "Ross A. Wollman" Date: Mon, 27 Jun 2022 12:22:06 -0700 Subject: [PATCH 8/8] fix writing incorrect content-length header --- tests/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/server.py b/tests/server.py index a5cd42019..75a0631d2 100644 --- a/tests/server.py +++ b/tests/server.py @@ -139,11 +139,11 @@ def process(self) -> None: content_type += "; charset=utf-8" request.setHeader(b"Content-Type", content_type) request.setHeader(b"Cache-Control", "no-cache, no-store") - request.setHeader(b"Content-Length", str(len(file_content))) if path in gzip_routes: request.setHeader("Content-Encoding", "gzip") request.write(gzip.compress(file_content)) else: + request.setHeader(b"Content-Length", str(len(file_content))) request.write(file_content) self.setResponseCode(HTTPStatus.OK) except (FileNotFoundError, IsADirectoryError, PermissionError):