From 1c73fed0f8f8775bd3645c8243b4d6ac5526b3d2 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Mon, 9 Mar 2026 14:50:35 +0900
Subject: [PATCH 1/3] fix(pypi): return yank reason from SimpleAPI HTML
This makes the logic in the parser a little bit more sophisticated, but
we also start handling the yank reason. This fixes the issue where the
`data-yank` presence but no value would be interpreted as a yanked
package. With this it should start working.
This implementation assumes that we have HTML escaped sequences as tag
values. It also unescapes them when returning the strings.
The posibilities that it gives us are:
- Use the `data-requires-python` to potentially discard any Python
packages that are unsupported in the `select_whl` function.
Work towards #260.
Work towards #2731.
---
python/private/pypi/parse_requirements.bzl | 10 +-
python/private/pypi/parse_simpleapi_html.bzl | 198 +++++++++++++-----
tests/pypi/hub_builder/hub_builder_tests.bzl | 8 +-
.../parse_requirements_tests.bzl | 66 +++---
.../parse_simpleapi_html_tests.bzl | 58 ++++-
5 files changed, 236 insertions(+), 104 deletions(-)
diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index acc35b3208..78b6662d08 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl
@@ -267,7 +267,7 @@ def _package_srcs(
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
)
req_line = r.srcs.requirement_line
else:
@@ -379,7 +379,7 @@ def _add_dists(*, requirement, index_urls, target_platform, logger = None):
url = requirement.srcs.url,
filename = requirement.srcs.filename,
sha256 = requirement.srcs.shas[0] if requirement.srcs.shas else "",
- yanked = False,
+ yanked = None,
)
return dist, False
@@ -403,12 +403,12 @@ def _add_dists(*, requirement, index_urls, target_platform, logger = None):
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
maybe_whl = index_urls.whls.get(sha256)
- if maybe_whl and not maybe_whl.yanked:
+ if maybe_whl and maybe_whl.yanked == None:
whls.append(maybe_whl)
continue
maybe_sdist = index_urls.sdists.get(sha256)
- if maybe_sdist and not maybe_sdist.yanked:
+ if maybe_sdist and maybe_sdist.yanked == None:
sdist = maybe_sdist
continue
@@ -416,7 +416,7 @@ def _add_dists(*, requirement, index_urls, target_platform, logger = None):
yanked = {}
for dist in whls + [sdist]:
- if dist and dist.yanked:
+ if dist and dist.yanked != None:
yanked.setdefault(dist.yanked, []).append(dist.filename)
if yanked:
logger.warn(lambda: "\n".join([
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index 6778d3da16..563130791e 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -26,81 +26,177 @@ def parse_simpleapi_html(*, content):
Returns:
A list of structs with:
- * filename: The filename of the artifact.
- * version: The version of the artifact.
- * url: The URL to download the artifact.
- * sha256: The sha256 of the artifact.
- * metadata_sha256: The whl METADATA sha256 if we can download it. If this is
- present, then the 'metadata_url' is also present. Defaults to "".
- * metadata_url: The URL for the METADATA if we can download it. Defaults to "".
+ * filename: {type}`str` The filename of the artifact.
+ * version: {type}`str` The version of the artifact.
+ * url: {type}`str` The URL to download the artifact.
+ * sha256: {type}`str` The sha256 of the artifact.
+ * metadata_sha256: {type}`str` The whl METADATA sha256 if we can download it. If this is
+ present, then the 'metadata_url' is also present. Defaults to "".
+ * metadata_url: {type}`str` The URL for the METADATA if we can download it. Defaults to "".
+ * yanked: {type}`str | None` the yank reason if the package is yanked. If it is not yanked,
+ then it will be `None`. An empty string yank reason means that the package is yanked but
+ the reason is not provided.
"""
sdists = {}
whls = {}
- lines = content.split("= (2, 0):
# We don't expect to have version 2.0 here, but have this check in place just in case.
# https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api
fail("Unsupported API version: {}".format(api_version))
- # Each line follows the following pattern
- # filename
- sha256s_by_version = {}
- for line in lines[1:]:
- dist_url, _, tail = line.partition("#sha256=")
+ # 2. Iterate using find() to avoid huge list allocations from .split("
+ tag_end = content.find(">", start_tag)
+ end_tag = content.find("", tag_end)
+ if tag_end == -1 or end_tag == -1:
+ break
+
+ # Extract only the necessary slices
+ attr_part = content[start_tag + 3:tag_end]
+ filename = content[tag_end + 1:end_tag].strip()
+
+ # Update cursor for next iteration
+ cursor = end_tag + 4
+
+ # 3. Efficient Attribute Parsing
+ attrs = _parse_attrs(attr_part)
+ href = attrs.get("href", "")
+ if not href:
+ continue
- sha256, _, tail = tail.partition("\"")
+ dist_url, _, sha256 = href.partition("#sha256=")
- # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
- yanked = "data-yanked" in line
+ # Handle Yanked status
+ yanked = None
+ if "data-yanked" in attrs:
+ yanked = _unescape_pypi_html(attrs["data-yanked"])
- head, _, _ = tail.rpartition("")
- maybe_metadata, _, filename = head.rpartition(">")
version = version_from_filename(filename)
sha256s_by_version.setdefault(version, []).append(sha256)
+ # 4. Optimized Metadata Check (PEP 714)
metadata_sha256 = ""
metadata_url = ""
- for metadata_marker in ["data-core-metadata", "data-dist-info-metadata"]:
- metadata_marker = metadata_marker + "=\"sha256="
- if metadata_marker in maybe_metadata:
- # Implement https://peps.python.org/pep-0714/
- _, _, tail = maybe_metadata.partition(metadata_marker)
- metadata_sha256, _, _ = tail.partition("\"")
- metadata_url = dist_url + ".metadata"
- break
+
+ # Dist-info is more common in modern PyPI
+ m_val = attrs.get("data-dist-info-metadata") or attrs.get("data-core-metadata")
+ if m_val and m_val != "false":
+ _, _, metadata_sha256 = m_val.partition("sha256=")
+ metadata_url = dist_url + ".metadata"
+
+ # 5. Result object
+ dist = struct(
+ filename = filename,
+ version = version,
+ url = dist_url,
+ sha256 = sha256,
+ metadata_sha256 = metadata_sha256,
+ metadata_url = metadata_url,
+ yanked = yanked,
+ )
if filename.endswith(".whl"):
- whls[sha256] = struct(
- filename = filename,
- version = version,
- url = dist_url,
- sha256 = sha256,
- metadata_sha256 = metadata_sha256,
- metadata_url = metadata_url,
- yanked = yanked,
- )
+ whls[sha256] = dist
else:
- sdists[sha256] = struct(
- filename = filename,
- version = version,
- url = dist_url,
- sha256 = sha256,
- metadata_sha256 = "",
- metadata_url = "",
- yanked = yanked,
- )
+ sdists[sha256] = dist
return struct(
sdists = sdists,
whls = whls,
sha256s_by_version = sha256s_by_version,
)
+
+def _parse_attrs(attr_string):
+ """Parses attributes from a pre-sliced string."""
+ attrs = {}
+ parts = attr_string.split('"')
+
+ for i in range(0, len(parts) - 1, 2):
+ raw_key = parts[i].strip()
+ if not raw_key:
+ continue
+
+ key_parts = raw_key.split(" ")
+ current_key = key_parts[-1].rstrip("=")
+
+ # Batch handle booleans
+ for j in range(len(key_parts) - 1):
+ b = key_parts[j].strip()
+ if b:
+ attrs[b] = ""
+
+ attrs[current_key] = parts[i + 1]
+
+ # Final trailing boolean check
+ last = parts[-1].strip()
+ if last:
+ for b in last.split(" "):
+ if b:
+ attrs[b] = ""
+ return attrs
+
+def _unescape_pypi_html(text):
+ """Unescape HTML text.
+
+ Decodes standard HTML entities used in the Simple API.
+ Specifically targets characters used in URLs and attribute values.
+
+ Args:
+ text: {type}`str` The text to replace.
+
+ Returns:
+ A string with unescaped characters
+ """
+
+ # 1. Short circuit for the most common case
+ if not text or "&" not in text:
+ return text
+
+ # 2. Check for the most frequent PEP 503 entities first (version constraints).
+ # Re-ordering based on frequency reduces unnecessary checks for rare entities.
+ if ">" in text:
+ text = text.replace(">", ">")
+ if "<" in text:
+ text = text.replace("<", "<")
+
+ # 3. Grouped check for numeric entities.
+ # If '' isn't there, we skip 4 distinct string scans.
+ if "" in text:
+ if "'" in text:
+ text = text.replace("'", "'")
+ if "'" in text:
+ text = text.replace("'", "'")
+ if "
" in text:
+ text = text.replace("
", "\n")
+ if "
" in text:
+ text = text.replace("
", "\r")
+
+ if """ in text:
+ text = text.replace(""", '"')
+
+ # 4. Handle ampersands last to prevent double-decoding.
+ if "&" in text:
+ text = text.replace("&", "&")
+
+ return text
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index 27040d36d7..170e12c4e4 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -777,7 +777,7 @@ def _test_simple_get_index(env):
"plat_pkg": struct(
whls = {
"deadb44f": struct(
- yanked = False,
+ yanked = None,
filename = "plat-pkg-0.0.4-py3-none-linux_x86_64.whl",
sha256 = "deadb44f",
url = "example2.org/index/plat_pkg/",
@@ -792,7 +792,7 @@ def _test_simple_get_index(env):
"simple": struct(
whls = {
"deadb00f": struct(
- yanked = False,
+ yanked = None,
filename = "simple-0.0.1-py3-none-any.whl",
sha256 = "deadb00f",
url = "example2.org",
@@ -800,7 +800,7 @@ def _test_simple_get_index(env):
},
sdists = {
"deadbeef": struct(
- yanked = False,
+ yanked = None,
filename = "simple-0.0.1.tar.gz",
sha256 = "deadbeef",
url = "example.org",
@@ -811,7 +811,7 @@ def _test_simple_get_index(env):
"some_other_pkg": struct(
whls = {
"deadb33f": struct(
- yanked = False,
+ yanked = None,
filename = "some-other-pkg-0.0.1-py3-none-any.whl",
sha256 = "deadb33f",
url = "example2.org/index/some_other_pkg/",
diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
index 0d03e94467..bea8ac5f78 100644
--- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl
+++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
@@ -143,7 +143,7 @@ def _test_simple(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -174,7 +174,7 @@ def _test_direct_urls_integration(env):
sha256 = "",
target_platforms = ["osx_x86_64"],
url = "https://github.com/org/foo/downloads/foo-1.1.tar.gz",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -184,7 +184,7 @@ def _test_direct_urls_integration(env):
sha256 = "",
target_platforms = ["linux_x86_64"],
url = "https://some-url/package.whl",
- yanked = False,
+ yanked = None,
),
],
),
@@ -216,7 +216,7 @@ def _test_direct_urls_no_extract(env):
sha256 = "",
target_platforms = ["osx_x86_64"],
url = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -226,7 +226,7 @@ def _test_direct_urls_no_extract(env):
sha256 = "",
target_platforms = ["linux_x86_64"],
url = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -258,7 +258,7 @@ def _test_extra_pip_args(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -287,7 +287,7 @@ def _test_dupe_requirements(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -318,7 +318,7 @@ def _test_multi_os(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -336,7 +336,7 @@ def _test_multi_os(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -346,7 +346,7 @@ def _test_multi_os(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -383,7 +383,7 @@ def _test_multi_os_legacy(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -401,7 +401,7 @@ def _test_multi_os_legacy(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -411,7 +411,7 @@ def _test_multi_os_legacy(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -464,7 +464,7 @@ def _test_env_marker_resolution(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -482,7 +482,7 @@ def _test_env_marker_resolution(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -512,7 +512,7 @@ def _test_different_package_version(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -522,7 +522,7 @@ def _test_different_package_version(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -552,7 +552,7 @@ def _test_different_package_extras(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -562,7 +562,7 @@ def _test_different_package_extras(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -591,7 +591,7 @@ def _test_optional_hash(env):
url = "https://example.org/bar-0.0.4.whl",
filename = "bar-0.0.4.whl",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -609,7 +609,7 @@ def _test_optional_hash(env):
url = "https://example.org/foo-0.0.5.whl",
filename = "foo-0.0.5.whl",
sha256 = "deadbeef",
- yanked = False,
+ yanked = None,
),
],
),
@@ -638,7 +638,7 @@ def _test_git_sources(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -680,7 +680,7 @@ def _test_overlapping_shas_with_index_results(env):
url = "sdist",
sha256 = "5d15t",
filename = "foo-0.0.1.tar.gz",
- yanked = False,
+ yanked = None,
),
},
whls = {
@@ -688,13 +688,13 @@ def _test_overlapping_shas_with_index_results(env):
url = "super2",
sha256 = "deadb11f",
filename = "foo-0.0.1-py3-none-macosx_14_0_x86_64.whl",
- yanked = False,
+ yanked = None,
),
"deadbaaf": struct(
url = "super2",
sha256 = "deadbaaf",
filename = "foo-0.0.1-py3-none-any.whl",
- yanked = False,
+ yanked = None,
),
},
),
@@ -716,7 +716,7 @@ def _test_overlapping_shas_with_index_results(env):
sha256 = "deadbaaf",
target_platforms = ["cp39_linux_x86_64"],
url = "super2",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -726,7 +726,7 @@ def _test_overlapping_shas_with_index_results(env):
sha256 = "deadb11f",
target_platforms = ["cp39_osx_x86_64"],
url = "super2",
- yanked = False,
+ yanked = None,
),
],
),
@@ -771,13 +771,13 @@ def _test_get_index_urls_different_versions(env):
url = "super2",
sha256 = "deadb11f",
filename = "foo-0.0.2-py3-none-any.whl",
- yanked = False,
+ yanked = None,
),
"deadbaaf": struct(
url = "super2",
sha256 = "deadbaaf",
filename = "foo-0.0.1-py3-none-any.whl",
- yanked = False,
+ yanked = None,
),
},
),
@@ -810,7 +810,7 @@ def _test_get_index_urls_different_versions(env):
sha256 = "",
target_platforms = ["cp39_linux_x86_64"],
url = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -820,7 +820,7 @@ def _test_get_index_urls_different_versions(env):
sha256 = "deadb11f",
target_platforms = ["cp310_linux_x86_64"],
url = "super2",
- yanked = False,
+ yanked = None,
),
],
),
@@ -855,7 +855,7 @@ def _test_get_index_urls_single_py_version(env):
url = "super2",
sha256 = "deadb11f",
filename = "foo-0.0.2-py3-none-any.whl",
- yanked = False,
+ yanked = None,
),
},
),
@@ -885,7 +885,7 @@ def _test_get_index_urls_single_py_version(env):
sha256 = "deadb11f",
target_platforms = ["cp310_linux_x86_64"],
url = "super2",
- yanked = False,
+ yanked = None,
),
],
),
diff --git a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
index f33ba05c91..f72d61371c 100644
--- a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
+++ b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
@@ -57,7 +57,7 @@ def _test_sdist(env):
filename = "foo-0.0.1.tar.gz",
sha256 = "deadbeefasource",
url = "https://example.org/full-url/foo-0.0.1.tar.gz",
- yanked = False,
+ yanked = None,
version = "0.0.1",
),
),
@@ -65,7 +65,25 @@ def _test_sdist(env):
struct(
attrs = [
'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
- 'data-requires-python=">=3.7"',
+ 'data-requires-python=">=3.7"',
+ "data-yanked",
+ ],
+ filename = "foo-0.0.1.tar.gz",
+ ),
+ struct(
+ filename = "foo-0.0.1.tar.gz",
+ sha256 = "deadbeefasource",
+ url = "https://example.org/full-url/foo-0.0.1.tar.gz",
+ version = "0.0.1",
+ yanked = "",
+ ),
+ ),
+ (
+ struct(
+ attrs = [
+ 'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
+ 'data-requires-python=">=3.7"',
+ "data-yanked=\"Something
with "quotes"
over two lines\"",
],
filename = "foo-0.0.1.tar.gz",
),
@@ -74,7 +92,25 @@ def _test_sdist(env):
sha256 = "deadbeefasource",
url = "https://example.org/full-url/foo-0.0.1.tar.gz",
version = "0.0.1",
- yanked = False,
+ # NOTE @aignas 2026-03-09: we preserve the white space
+ yanked = "Something \nwith \"quotes\"\nover two lines",
+ ),
+ ),
+ (
+ struct(
+ attrs = [
+ 'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
+ 'data-requires-python=">=3.7"',
+ 'data-yanked=""',
+ ],
+ filename = "foo-0.0.1.tar.gz",
+ ),
+ struct(
+ filename = "foo-0.0.1.tar.gz",
+ sha256 = "deadbeefasource",
+ url = "https://example.org/full-url/foo-0.0.1.tar.gz",
+ version = "0.0.1",
+ yanked = "",
),
),
]
@@ -94,7 +130,7 @@ def _test_sdist(env):
filename = subjects.str,
sha256 = subjects.str,
url = subjects.str,
- yanked = subjects.bool,
+ yanked = subjects.str,
version = subjects.str,
),
)
@@ -126,14 +162,14 @@ def _test_whls(env):
sha256 = "deadbeef",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
version = "0.0.2",
- yanked = False,
+ yanked = None,
),
),
(
struct(
attrs = [
'href="https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef"',
- 'data-requires-python=">=3.7"',
+ 'data-requires-python=">=3.7"',
'data-dist-info-metadata="sha256=deadb00f"',
'data-core-metadata="sha256=deadb00f"',
],
@@ -146,7 +182,7 @@ def _test_whls(env):
sha256 = "deadbeef",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
version = "0.0.2",
- yanked = False,
+ yanked = None,
),
),
(
@@ -165,7 +201,7 @@ def _test_whls(env):
sha256 = "deadbeef",
version = "0.0.2",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- yanked = False,
+ yanked = None,
),
),
(
@@ -184,7 +220,7 @@ def _test_whls(env):
sha256 = "deadbeef",
version = "0.0.2",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- yanked = False,
+ yanked = None,
),
),
(
@@ -202,7 +238,7 @@ def _test_whls(env):
sha256 = "deadbeef",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
version = "0.0.2",
- yanked = False,
+ yanked = None,
),
),
]
@@ -223,7 +259,7 @@ def _test_whls(env):
metadata_url = subjects.str,
sha256 = subjects.str,
url = subjects.str,
- yanked = subjects.bool,
+ yanked = subjects.str,
version = subjects.str,
),
)
From 9dd238cbbc5e6f00ccebf19459ffb2f710934927 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Tue, 10 Mar 2026 20:05:54 +0900
Subject: [PATCH 2/3] wip
---
python/private/pypi/parse_simpleapi_html.bzl | 23 +++-
python/private/pypi/simpleapi_download.bzl | 123 +++++++++++++------
2 files changed, 106 insertions(+), 40 deletions(-)
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index 563130791e..fc31fdb9e0 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -18,14 +18,17 @@ Parse SimpleAPI HTML in Starlark.
load(":version_from_filename.bzl", "version_from_filename")
-def parse_simpleapi_html(*, content):
+def parse_simpleapi_html(*, content, parse_index = False):
"""Get the package URLs for given shas by parsing the Simple API HTML.
Args:
- content(str): The Simple API HTML content.
+ content: {type}`str` The Simple API HTML content.
+ parse_index: {type}`bool` whether to parse the content as the index page of the PyPI index,
+ e.g. the `https://pypi.org/simple/`. This only has the URLs for the individual package.
Returns:
- A list of structs with:
+ If it is the index page, return the map of package to URL it can be queried from.
+ Otherwise, a list of structs with:
* filename: {type}`str` The filename of the artifact.
* version: {type}`str` The version of the artifact.
* url: {type}`str` The URL to download the artifact.
@@ -59,6 +62,8 @@ def parse_simpleapi_html(*, content):
# https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api
fail("Unsupported API version: {}".format(api_version))
+ packages = {}
+
# 2. Iterate using find() to avoid huge list allocations from .split("
Date: Tue, 10 Mar 2026 20:08:21 +0900
Subject: [PATCH 3/3] wip
---
python/private/pypi/simpleapi_download.bzl | 1 +
1 file changed, 1 insertion(+)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index befe551ccc..cbef724670 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -197,6 +197,7 @@ def _get_index_overrides(ctx, index_urls, index_url_overrides, read_simpleapi, *
)
if hasattr(download, "wait"):
# TODO @aignas 2026-03-10: do parallel download if possible
+ # for all indexes at once
result = download.wait()
else:
result = download