Skip to content

Commit bb2a3d7

Browse files
committed
Remove the html5lib deprecated feature flag.
1 parent dc00479 commit bb2a3d7

File tree

14 files changed

+21
-151
lines changed

14 files changed

+21
-151
lines changed

news/10825.removal.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Remove the ``html5lib`` deprecated feature flag.

src/pip/_internal/cli/cmdoptions.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,6 @@ def check_list_path_option(options: Values) -> None:
10131013
default=[],
10141014
choices=[
10151015
"legacy-resolver",
1016-
"html5lib",
10171016
],
10181017
help=("Enable deprecated functionality, that will be removed in the future."),
10191018
)

src/pip/_internal/cli/req_command.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,5 +499,4 @@ def _build_package_finder(
499499
link_collector=link_collector,
500500
selection_prefs=selection_prefs,
501501
target_python=target_python,
502-
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
503502
)

src/pip/_internal/commands/index.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ def _build_package_finder(
9797
link_collector=link_collector,
9898
selection_prefs=selection_prefs,
9999
target_python=target_python,
100-
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
101100
)
102101

103102
def get_available_package_versions(self, options: Values, args: List[Any]) -> None:

src/pip/_internal/commands/list.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ def _build_package_finder(
149149
return PackageFinder.create(
150150
link_collector=link_collector,
151151
selection_prefs=selection_prefs,
152-
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
153152
)
154153

155154
def run(self, options: Values, args: List[str]) -> int:

src/pip/_internal/index/collector.py

Lines changed: 8 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
Union,
3030
)
3131

32-
from pip._vendor import html5lib, requests
32+
from pip._vendor import requests
3333
from pip._vendor.requests import Response
3434
from pip._vendor.requests.exceptions import RetryError, SSLError
3535

@@ -191,27 +191,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
191191
return None
192192

193193

194-
def _determine_base_url(document: HTMLElement, page_url: str) -> str:
195-
"""Determine the HTML document's base URL.
196-
197-
This looks for a ``<base>`` tag in the HTML document. If present, its href
198-
attribute denotes the base URL of anchor tags in the document. If there is
199-
no such tag (or if it does not have a valid href attribute), the HTML
200-
file's URL is used as the base URL.
201-
202-
:param document: An HTML document representation. The current
203-
implementation expects the result of ``html5lib.parse()``.
204-
:param page_url: The URL of the HTML document.
205-
206-
TODO: Remove when `html5lib` is dropped.
207-
"""
208-
for base in document.findall(".//base"):
209-
href = base.get("href")
210-
if href is not None:
211-
return href
212-
return page_url
213-
214-
215194
def _clean_url_path_part(part: str) -> str:
216195
"""
217196
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
@@ -313,9 +292,7 @@ def __hash__(self) -> int:
313292

314293

315294
class ParseLinks(Protocol):
316-
def __call__(
317-
self, page: "IndexContent", use_deprecated_html5lib: bool
318-
) -> Iterable[Link]:
295+
def __call__(self, page: "IndexContent") -> Iterable[Link]:
319296
...
320297

321298

@@ -327,49 +304,20 @@ def with_cached_index_content(fn: ParseLinks) -> ParseLinks:
327304
"""
328305

329306
@functools.lru_cache(maxsize=None)
330-
def wrapper(
331-
cacheable_page: CacheablePageContent, use_deprecated_html5lib: bool
332-
) -> List[Link]:
333-
return list(fn(cacheable_page.page, use_deprecated_html5lib))
307+
def wrapper(cacheable_page: CacheablePageContent) -> List[Link]:
308+
return list(fn(cacheable_page.page))
334309

335310
@functools.wraps(fn)
336-
def wrapper_wrapper(
337-
page: "IndexContent", use_deprecated_html5lib: bool
338-
) -> List[Link]:
311+
def wrapper_wrapper(page: "IndexContent") -> List[Link]:
339312
if page.cache_link_parsing:
340-
return wrapper(CacheablePageContent(page), use_deprecated_html5lib)
341-
return list(fn(page, use_deprecated_html5lib))
313+
return wrapper(CacheablePageContent(page))
314+
return list(fn(page))
342315

343316
return wrapper_wrapper
344317

345318

346-
def _parse_links_html5lib(page: "IndexContent") -> Iterable[Link]:
347-
"""
348-
Parse an HTML document, and yield its anchor elements as Link objects.
349-
350-
TODO: Remove when `html5lib` is dropped.
351-
"""
352-
document = html5lib.parse(
353-
page.content,
354-
transport_encoding=page.encoding,
355-
namespaceHTMLElements=False,
356-
)
357-
358-
url = page.url
359-
base_url = _determine_base_url(document, url)
360-
for anchor in document.findall(".//a"):
361-
link = _create_link_from_element(
362-
anchor.attrib,
363-
page_url=url,
364-
base_url=base_url,
365-
)
366-
if link is None:
367-
continue
368-
yield link
369-
370-
371319
@with_cached_index_content
372-
def parse_links(page: "IndexContent", use_deprecated_html5lib: bool) -> Iterable[Link]:
320+
def parse_links(page: "IndexContent") -> Iterable[Link]:
373321
"""
374322
Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
375323
"""
@@ -398,10 +346,6 @@ def parse_links(page: "IndexContent", use_deprecated_html5lib: bool) -> Iterable
398346
hashes=file.get("hashes", {}),
399347
)
400348

401-
if use_deprecated_html5lib:
402-
yield from _parse_links_html5lib(page)
403-
return
404-
405349
parser = HTMLLinkParser(page.url)
406350
encoding = page.encoding or "utf-8"
407351
parser.feed(page.content.decode(encoding))

src/pip/_internal/index/package_finder.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,6 @@ def __init__(
598598
link_collector: LinkCollector,
599599
target_python: TargetPython,
600600
allow_yanked: bool,
601-
use_deprecated_html5lib: bool,
602601
format_control: Optional[FormatControl] = None,
603602
candidate_prefs: Optional[CandidatePreferences] = None,
604603
ignore_requires_python: Optional[bool] = None,
@@ -623,7 +622,6 @@ def __init__(
623622
self._ignore_requires_python = ignore_requires_python
624623
self._link_collector = link_collector
625624
self._target_python = target_python
626-
self._use_deprecated_html5lib = use_deprecated_html5lib
627625

628626
self.format_control = format_control
629627

@@ -640,8 +638,6 @@ def create(
640638
link_collector: LinkCollector,
641639
selection_prefs: SelectionPreferences,
642640
target_python: Optional[TargetPython] = None,
643-
*,
644-
use_deprecated_html5lib: bool,
645641
) -> "PackageFinder":
646642
"""Create a PackageFinder.
647643
@@ -666,7 +662,6 @@ def create(
666662
allow_yanked=selection_prefs.allow_yanked,
667663
format_control=selection_prefs.format_control,
668664
ignore_requires_python=selection_prefs.ignore_requires_python,
669-
use_deprecated_html5lib=use_deprecated_html5lib,
670665
)
671666

672667
@property
@@ -796,7 +791,7 @@ def process_project_url(
796791
if index_response is None:
797792
return []
798793

799-
page_links = list(parse_links(index_response, self._use_deprecated_html5lib))
794+
page_links = list(parse_links(index_response))
800795

801796
with indent_log():
802797
package_links = self.evaluate_links(

src/pip/_internal/self_outdated_check.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ def _get_current_remote_pip_version(
173173
finder = PackageFinder.create(
174174
link_collector=link_collector,
175175
selection_prefs=selection_prefs,
176-
use_deprecated_html5lib=("html5lib" in options.deprecated_features_enabled),
177176
)
178177
best_candidate = finder.find_best_candidate("pip").best_candidate
179178
if best_candidate is None:

tests/functional/test_build_env.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def run_with_build_env(
4949
finder = PackageFinder.create(
5050
link_collector=link_collector,
5151
selection_prefs=selection_prefs,
52-
use_deprecated_html5lib=False,
5352
)
5453
5554
with global_tempdir_manager():

tests/lib/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ def make_test_finder(
115115
allow_all_prereleases: bool = False,
116116
session: Optional[PipSession] = None,
117117
target_python: Optional[TargetPython] = None,
118-
use_deprecated_html5lib: bool = False,
119118
) -> PackageFinder:
120119
"""
121120
Create a PackageFinder for testing purposes.
@@ -134,7 +133,6 @@ def make_test_finder(
134133
link_collector=link_collector,
135134
selection_prefs=selection_prefs,
136135
target_python=target_python,
137-
use_deprecated_html5lib=use_deprecated_html5lib,
138136
)
139137

140138

tests/unit/resolution_resolvelib/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def finder(data: TestData) -> Iterator[PackageFinder]:
2626
scope = SearchScope([str(data.packages)], [])
2727
collector = LinkCollector(session, scope)
2828
prefs = SelectionPreferences(allow_yanked=False)
29-
finder = PackageFinder.create(collector, prefs, use_deprecated_html5lib=False)
29+
finder = PackageFinder.create(collector, prefs)
3030
yield finder
3131

3232

tests/unit/test_collector.py

Lines changed: 6 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,14 @@
1010
from unittest import mock
1111

1212
import pytest
13-
from pip._vendor import html5lib, requests
13+
from pip._vendor import requests
1414

1515
from pip._internal.exceptions import NetworkConnectionError
1616
from pip._internal.index.collector import (
1717
IndexContent,
1818
LinkCollector,
1919
_clean_link,
2020
_clean_url_path,
21-
_determine_base_url,
2221
_get_index_content,
2322
_get_simple_response,
2423
_make_index_content,
@@ -249,33 +248,6 @@ def test_get_simple_response_dont_log_clear_text_password(
249248
]
250249

251250

252-
@pytest.mark.parametrize(
253-
("html", "url", "expected"),
254-
[
255-
(b"<html></html>", "https://example.com/", "https://example.com/"),
256-
(
257-
b'<html><head><base href="https://foo.example.com/"></head></html>',
258-
"https://example.com/",
259-
"https://foo.example.com/",
260-
),
261-
(
262-
b"<html><head>"
263-
b'<base><base href="https://foo.example.com/">'
264-
b"</head></html>",
265-
"https://example.com/",
266-
"https://foo.example.com/",
267-
),
268-
],
269-
)
270-
def test_determine_base_url(html: bytes, url: str, expected: str) -> None:
271-
document = html5lib.parse(
272-
html,
273-
transport_encoding=None,
274-
namespaceHTMLElements=False,
275-
)
276-
assert _determine_base_url(document, url) == expected
277-
278-
279251
@pytest.mark.parametrize(
280252
("path", "expected"),
281253
[
@@ -451,7 +423,7 @@ def _test_parse_links_data_attribute(
451423
# the page content isn't cached.
452424
url=f"https://example.com/simple-{uuid.uuid4()}/",
453425
)
454-
links = list(parse_links(page, use_deprecated_html5lib=False))
426+
links = list(parse_links(page))
455427
(link,) = links
456428
actual = getattr(link, attr)
457429
assert actual == expected
@@ -513,7 +485,7 @@ def test_parse_links_json() -> None:
513485
# the page content isn't cached.
514486
url=f"https://example.com/simple-{uuid.uuid4()}/",
515487
)
516-
links = list(parse_links(page, use_deprecated_html5lib=False))
488+
links = list(parse_links(page))
517489

518490
assert links == [
519491
Link(
@@ -597,33 +569,19 @@ def test_parse_links_caches_same_page_by_url() -> None:
597569
cache_link_parsing=False,
598570
)
599571

600-
parsed_links_1 = list(parse_links(page_1, use_deprecated_html5lib=False))
572+
parsed_links_1 = list(parse_links(page_1))
601573
assert len(parsed_links_1) == 1
602574
assert "pkg1" in parsed_links_1[0].url
603575

604-
parsed_links_2 = list(parse_links(page_2, use_deprecated_html5lib=False))
576+
parsed_links_2 = list(parse_links(page_2))
605577
assert parsed_links_2 == parsed_links_1
606578

607-
parsed_links_3 = list(parse_links(page_3, use_deprecated_html5lib=False))
579+
parsed_links_3 = list(parse_links(page_3))
608580
assert len(parsed_links_3) == 1
609581
assert parsed_links_3 != parsed_links_1
610582
assert "pkg2" in parsed_links_3[0].url
611583

612584

613-
def test_parse_link_handles_deprecated_usage_properly() -> None:
614-
html = b'<a href="/pkg1-1.0.tar.gz"></a><a href="/pkg1-2.0.tar.gz"></a>'
615-
url = "https://example.com/simple/"
616-
page = IndexContent(
617-
html, "text/html", encoding=None, url=url, cache_link_parsing=False
618-
)
619-
620-
parsed_links = list(parse_links(page, use_deprecated_html5lib=True))
621-
622-
assert len(parsed_links) == 2
623-
assert "pkg1-1.0" in parsed_links[0].url
624-
assert "pkg1-2.0" in parsed_links[1].url
625-
626-
627585
@mock.patch("pip._internal.index.collector.raise_for_status")
628586
def test_request_http_error(
629587
mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture

tests/unit/test_finder.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,7 @@ def test_incorrect_case_file_index(data: TestData) -> None:
8080

8181

8282
@pytest.mark.network
83-
@pytest.mark.parametrize("use_deprecated_html5lib", [False, True])
84-
def test_finder_detects_latest_already_satisfied_find_links(
85-
data: TestData, use_deprecated_html5lib: bool
86-
) -> None:
83+
def test_finder_detects_latest_already_satisfied_find_links(data: TestData) -> None:
8784
"""Test PackageFinder detects latest already satisfied using find-links"""
8885
req = install_req_from_line("simple", None)
8986
# the latest simple in local pkgs is 3.0
@@ -93,19 +90,14 @@ def test_finder_detects_latest_already_satisfied_find_links(
9390
version=parse_version(latest_version),
9491
)
9592
req.satisfied_by = satisfied_by
96-
finder = make_test_finder(
97-
find_links=[data.find_links], use_deprecated_html5lib=use_deprecated_html5lib
98-
)
93+
finder = make_test_finder(find_links=[data.find_links])
9994

10095
with pytest.raises(BestVersionAlreadyInstalled):
10196
finder.find_requirement(req, True)
10297

10398

10499
@pytest.mark.network
105-
@pytest.mark.parametrize("use_deprecated_html5lib", [False, True])
106-
def test_finder_detects_latest_already_satisfied_pypi_links(
107-
use_deprecated_html5lib: bool,
108-
) -> None:
100+
def test_finder_detects_latest_already_satisfied_pypi_links() -> None:
109101
"""Test PackageFinder detects latest already satisfied using pypi links"""
110102
req = install_req_from_line("initools", None)
111103
# the latest initools on PyPI is 0.3.1
@@ -115,10 +107,7 @@ def test_finder_detects_latest_already_satisfied_pypi_links(
115107
version=parse_version(latest_version),
116108
)
117109
req.satisfied_by = satisfied_by
118-
finder = make_test_finder(
119-
index_urls=["http://pypi.org/simple/"],
120-
use_deprecated_html5lib=use_deprecated_html5lib,
121-
)
110+
finder = make_test_finder(index_urls=["http://pypi.org/simple/"])
122111

123112
with pytest.raises(BestVersionAlreadyInstalled):
124113
finder.find_requirement(req, True)

0 commit comments

Comments
 (0)