Skip to content

Commit bad03ef

Browse files
Use data-dist-info-metadata (PEP 658) to decouple resolution from downloading (#11111)
Co-authored-by: Tzu-ping Chung <uranusjr@gmail.com>
1 parent a66406a commit bad03ef

File tree

14 files changed

+834
-180
lines changed

14 files changed

+834
-180
lines changed

news/11111.feature.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata without downloading the dist yet.

src/pip/_internal/exceptions.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -335,8 +335,8 @@ class MetadataInconsistent(InstallationError):
335335
"""Built metadata contains inconsistent information.
336336
337337
This is raised when the metadata contains values (e.g. name and version)
338-
that do not match the information previously obtained from sdist filename
339-
or user-supplied ``#egg=`` value.
338+
that do not match the information previously obtained from sdist filename,
339+
user-supplied ``#egg=`` value, or an install requirement name.
340340
"""
341341

342342
def __init__(
@@ -348,11 +348,10 @@ def __init__(
348348
self.m_val = m_val
349349

350350
def __str__(self) -> str:
351-
template = (
352-
"Requested {} has inconsistent {}: "
353-
"filename has {!r}, but metadata has {!r}"
351+
return (
352+
f"Requested {self.ireq} has inconsistent {self.field}: "
353+
f"expected {self.f_val!r}, but metadata has {self.m_val!r}"
354354
)
355-
return template.format(self.ireq, self.field, self.f_val, self.m_val)
356355

357356

358357
class LegacyInstallFailure(DiagnosticPipError):

src/pip/_internal/index/collector.py

+5-115
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99
import json
1010
import logging
1111
import os
12-
import re
1312
import urllib.parse
1413
import urllib.request
15-
import xml.etree.ElementTree
1614
from html.parser import HTMLParser
1715
from optparse import Values
1816
from typing import (
@@ -39,7 +37,7 @@
3937
from pip._internal.network.session import PipSession
4038
from pip._internal.network.utils import raise_for_status
4139
from pip._internal.utils.filetypes import is_archive_file
42-
from pip._internal.utils.misc import pairwise, redact_auth_from_url
40+
from pip._internal.utils.misc import redact_auth_from_url
4341
from pip._internal.vcs import vcs
4442

4543
from .sources import CandidatesFromPage, LinkSource, build_source
@@ -51,7 +49,6 @@
5149

5250
logger = logging.getLogger(__name__)
5351

54-
HTMLElement = xml.etree.ElementTree.Element
5552
ResponseHeaders = MutableMapping[str, str]
5653

5754

@@ -191,94 +188,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
191188
return None
192189

193190

194-
def _clean_url_path_part(part: str) -> str:
195-
"""
196-
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
197-
"""
198-
# We unquote prior to quoting to make sure nothing is double quoted.
199-
return urllib.parse.quote(urllib.parse.unquote(part))
200-
201-
202-
def _clean_file_url_path(part: str) -> str:
203-
"""
204-
Clean the first part of a URL path that corresponds to a local
205-
filesystem path (i.e. the first part after splitting on "@" characters).
206-
"""
207-
# We unquote prior to quoting to make sure nothing is double quoted.
208-
# Also, on Windows the path part might contain a drive letter which
209-
# should not be quoted. On Linux where drive letters do not
210-
# exist, the colon should be quoted. We rely on urllib.request
211-
# to do the right thing here.
212-
return urllib.request.pathname2url(urllib.request.url2pathname(part))
213-
214-
215-
# percent-encoded: /
216-
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
217-
218-
219-
def _clean_url_path(path: str, is_local_path: bool) -> str:
220-
"""
221-
Clean the path portion of a URL.
222-
"""
223-
if is_local_path:
224-
clean_func = _clean_file_url_path
225-
else:
226-
clean_func = _clean_url_path_part
227-
228-
# Split on the reserved characters prior to cleaning so that
229-
# revision strings in VCS URLs are properly preserved.
230-
parts = _reserved_chars_re.split(path)
231-
232-
cleaned_parts = []
233-
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
234-
cleaned_parts.append(clean_func(to_clean))
235-
# Normalize %xx escapes (e.g. %2f -> %2F)
236-
cleaned_parts.append(reserved.upper())
237-
238-
return "".join(cleaned_parts)
239-
240-
241-
def _clean_link(url: str) -> str:
242-
"""
243-
Make sure a link is fully quoted.
244-
For example, if ' ' occurs in the URL, it will be replaced with "%20",
245-
and without double-quoting other characters.
246-
"""
247-
# Split the URL into parts according to the general structure
248-
# `scheme://netloc/path;parameters?query#fragment`.
249-
result = urllib.parse.urlparse(url)
250-
# If the netloc is empty, then the URL refers to a local filesystem path.
251-
is_local_path = not result.netloc
252-
path = _clean_url_path(result.path, is_local_path=is_local_path)
253-
return urllib.parse.urlunparse(result._replace(path=path))
254-
255-
256-
def _create_link_from_element(
257-
element_attribs: Dict[str, Optional[str]],
258-
page_url: str,
259-
base_url: str,
260-
) -> Optional[Link]:
261-
"""
262-
Convert an anchor element's attributes in a simple repository page to a Link.
263-
"""
264-
href = element_attribs.get("href")
265-
if not href:
266-
return None
267-
268-
url = _clean_link(urllib.parse.urljoin(base_url, href))
269-
pyrequire = element_attribs.get("data-requires-python")
270-
yanked_reason = element_attribs.get("data-yanked")
271-
272-
link = Link(
273-
url,
274-
comes_from=page_url,
275-
requires_python=pyrequire,
276-
yanked_reason=yanked_reason,
277-
)
278-
279-
return link
280-
281-
282191
class CacheablePageContent:
283192
def __init__(self, page: "IndexContent") -> None:
284193
assert page.cache_link_parsing
@@ -326,25 +235,10 @@ def parse_links(page: "IndexContent") -> Iterable[Link]:
326235
if content_type_l.startswith("application/vnd.pypi.simple.v1+json"):
327236
data = json.loads(page.content)
328237
for file in data.get("files", []):
329-
file_url = file.get("url")
330-
if file_url is None:
238+
link = Link.from_json(file, page.url)
239+
if link is None:
331240
continue
332-
333-
# The Link.yanked_reason expects an empty string instead of a boolean.
334-
yanked_reason = file.get("yanked")
335-
if yanked_reason and not isinstance(yanked_reason, str):
336-
yanked_reason = ""
337-
# The Link.yanked_reason expects None instead of False
338-
elif not yanked_reason:
339-
yanked_reason = None
340-
341-
yield Link(
342-
_clean_link(urllib.parse.urljoin(page.url, file_url)),
343-
comes_from=page.url,
344-
requires_python=file.get("requires-python"),
345-
yanked_reason=yanked_reason,
346-
hashes=file.get("hashes", {}),
347-
)
241+
yield link
348242
return
349243

350244
parser = HTMLLinkParser(page.url)
@@ -354,11 +248,7 @@ def parse_links(page: "IndexContent") -> Iterable[Link]:
354248
url = page.url
355249
base_url = parser.base_url or url
356250
for anchor in parser.anchors:
357-
link = _create_link_from_element(
358-
anchor,
359-
page_url=url,
360-
base_url=base_url,
361-
)
251+
link = Link.from_element(anchor, page_url=url, base_url=base_url)
362252
if link is None:
363253
continue
364254
yield link

src/pip/_internal/metadata/__init__.py

+22
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,25 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio
103103
:param canonical_name: Normalized project name of the given wheel.
104104
"""
105105
return select_backend().Distribution.from_wheel(wheel, canonical_name)
106+
107+
108+
def get_metadata_distribution(
109+
metadata_contents: bytes,
110+
filename: str,
111+
canonical_name: str,
112+
) -> BaseDistribution:
113+
"""Get the dist representation of the specified METADATA file contents.
114+
115+
This returns a Distribution instance from the chosen backend sourced from the data
116+
in `metadata_contents`.
117+
118+
:param metadata_contents: Contents of a METADATA file within a dist, or one served
119+
via PEP 658.
120+
:param filename: Filename for the dist this metadata represents.
121+
:param canonical_name: Normalized project name of the given dist.
122+
"""
123+
return select_backend().Distribution.from_metadata_file_contents(
124+
metadata_contents,
125+
filename,
126+
canonical_name,
127+
)

src/pip/_internal/metadata/base.py

+18
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,24 @@ def from_directory(cls, directory: str) -> "BaseDistribution":
113113
"""
114114
raise NotImplementedError()
115115

116+
@classmethod
117+
def from_metadata_file_contents(
118+
cls,
119+
metadata_contents: bytes,
120+
filename: str,
121+
project_name: str,
122+
) -> "BaseDistribution":
123+
"""Load the distribution from the contents of a METADATA file.
124+
125+
This is used to implement PEP 658 by generating a "shallow" dist object that can
126+
be used for resolution without downloading or building the actual dist yet.
127+
128+
:param metadata_contents: The contents of a METADATA file.
129+
:param filename: File name for the dist with this metadata.
130+
:param project_name: Name of the project this dist represents.
131+
"""
132+
raise NotImplementedError()
133+
116134
@classmethod
117135
def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution":
118136
"""Load the distribution from a given wheel.

src/pip/_internal/metadata/importlib/_dists.py

+18
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
)
2929
from pip._internal.utils.misc import normalize_path
3030
from pip._internal.utils.packaging import safe_extra
31+
from pip._internal.utils.temp_dir import TempDirectory
3132
from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file
3233

3334
from ._compat import BasePath, get_dist_name
@@ -109,6 +110,23 @@ def from_directory(cls, directory: str) -> BaseDistribution:
109110
dist = importlib.metadata.Distribution.at(info_location)
110111
return cls(dist, info_location, info_location.parent)
111112

113+
@classmethod
114+
def from_metadata_file_contents(
115+
cls,
116+
metadata_contents: bytes,
117+
filename: str,
118+
project_name: str,
119+
) -> BaseDistribution:
120+
# Generate temp dir to contain the metadata file, and write the file contents.
121+
temp_dir = pathlib.Path(
122+
TempDirectory(kind="metadata", globally_managed=True).path
123+
)
124+
metadata_path = temp_dir / "METADATA"
125+
metadata_path.write_bytes(metadata_contents)
126+
# Construct dist pointing to the newly created directory.
127+
dist = importlib.metadata.Distribution.at(metadata_path.parent)
128+
return cls(dist, metadata_path.parent, None)
129+
112130
@classmethod
113131
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
114132
try:

src/pip/_internal/metadata/pkg_resources.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class EntryPoint(NamedTuple):
3333
group: str
3434

3535

36-
class WheelMetadata:
36+
class InMemoryMetadata:
3737
"""IMetadataProvider that reads metadata files from a dictionary.
3838
3939
This also maps metadata decoding exceptions to our internal exception type.
@@ -92,12 +92,29 @@ def from_directory(cls, directory: str) -> BaseDistribution:
9292
dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
9393
return cls(dist)
9494

95+
@classmethod
96+
def from_metadata_file_contents(
97+
cls,
98+
metadata_contents: bytes,
99+
filename: str,
100+
project_name: str,
101+
) -> BaseDistribution:
102+
metadata_dict = {
103+
"METADATA": metadata_contents,
104+
}
105+
dist = pkg_resources.DistInfoDistribution(
106+
location=filename,
107+
metadata=InMemoryMetadata(metadata_dict, filename),
108+
project_name=project_name,
109+
)
110+
return cls(dist)
111+
95112
@classmethod
96113
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
97114
try:
98115
with wheel.as_zipfile() as zf:
99116
info_dir, _ = parse_wheel(zf, name)
100-
metadata_text = {
117+
metadata_dict = {
101118
path.split("/", 1)[-1]: read_wheel_metadata_file(zf, path)
102119
for path in zf.namelist()
103120
if path.startswith(f"{info_dir}/")
@@ -108,7 +125,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
108125
raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
109126
dist = pkg_resources.DistInfoDistribution(
110127
location=wheel.location,
111-
metadata=WheelMetadata(metadata_text, wheel.location),
128+
metadata=InMemoryMetadata(metadata_dict, wheel.location),
112129
project_name=name,
113130
)
114131
return cls(dist)

0 commit comments

Comments
 (0)