diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index acc35b3208..78b6662d08 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl
@@ -267,7 +267,7 @@ def _package_srcs(
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
)
req_line = r.srcs.requirement_line
else:
@@ -379,7 +379,7 @@ def _add_dists(*, requirement, index_urls, target_platform, logger = None):
url = requirement.srcs.url,
filename = requirement.srcs.filename,
sha256 = requirement.srcs.shas[0] if requirement.srcs.shas else "",
- yanked = False,
+ yanked = None,
)
return dist, False
@@ -403,12 +403,12 @@ def _add_dists(*, requirement, index_urls, target_platform, logger = None):
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
maybe_whl = index_urls.whls.get(sha256)
- if maybe_whl and not maybe_whl.yanked:
+ if maybe_whl and maybe_whl.yanked == None:
whls.append(maybe_whl)
continue
maybe_sdist = index_urls.sdists.get(sha256)
- if maybe_sdist and not maybe_sdist.yanked:
+ if maybe_sdist and maybe_sdist.yanked == None:
sdist = maybe_sdist
continue
@@ -416,7 +416,7 @@ def _add_dists(*, requirement, index_urls, target_platform, logger = None):
yanked = {}
for dist in whls + [sdist]:
- if dist and dist.yanked:
+ if dist and dist.yanked != None:
yanked.setdefault(dist.yanked, []).append(dist.filename)
if yanked:
logger.warn(lambda: "\n".join([
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index 6778d3da16..fc31fdb9e0 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -18,89 +18,198 @@ Parse SimpleAPI HTML in Starlark.
load(":version_from_filename.bzl", "version_from_filename")
-def parse_simpleapi_html(*, content):
+def parse_simpleapi_html(*, content, parse_index = False):
"""Get the package URLs for given shas by parsing the Simple API HTML.
Args:
- content(str): The Simple API HTML content.
+ content: {type}`str` The Simple API HTML content.
+ parse_index: {type}`bool` whether to parse the content as the index page of the PyPI index,
+ e.g. the `https://pypi.org/simple/`. This only has the URLs for the individual package.
Returns:
- A list of structs with:
- * filename: The filename of the artifact.
- * version: The version of the artifact.
- * url: The URL to download the artifact.
- * sha256: The sha256 of the artifact.
- * metadata_sha256: The whl METADATA sha256 if we can download it. If this is
- present, then the 'metadata_url' is also present. Defaults to "".
- * metadata_url: The URL for the METADATA if we can download it. Defaults to "".
+ If it is the index page, return the map of package to URL it can be queried from.
+ Otherwise, a list of structs with:
+ * filename: {type}`str` The filename of the artifact.
+ * version: {type}`str` The version of the artifact.
+ * url: {type}`str` The URL to download the artifact.
+ * sha256: {type}`str` The sha256 of the artifact.
+ * metadata_sha256: {type}`str` The whl METADATA sha256 if we can download it. If this is
+ present, then the 'metadata_url' is also present. Defaults to "".
+ * metadata_url: {type}`str` The URL for the METADATA if we can download it. Defaults to "".
+ * yanked: {type}`str | None` the yank reason if the package is yanked. If it is not yanked,
+ then it will be `None`. An empty string yank reason means that the package is yanked but
+ the reason is not provided.
"""
sdists = {}
whls = {}
- lines = content.split("= (2, 0):
# We don't expect to have version 2.0 here, but have this check in place just in case.
# https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api
fail("Unsupported API version: {}".format(api_version))
- # Each line follows the following pattern
- # filename
- sha256s_by_version = {}
- for line in lines[1:]:
- dist_url, _, tail = line.partition("#sha256=")
+ packages = {}
+
+ # 2. Iterate using find() to avoid huge list allocations from .split("
+ tag_end = content.find(">", start_tag)
+ end_tag = content.find("", tag_end)
+ if tag_end == -1 or end_tag == -1:
+ break
+
+ # Extract only the necessary slices
+ filename = content[tag_end + 1:end_tag].strip()
+ attr_part = content[start_tag + 3:tag_end]
+
+ # Update cursor for next iteration
+ cursor = end_tag + 4
+
+ attrs = _parse_attrs(attr_part)
+ href = attrs.get("href", "")
+ if not href:
+ continue
- sha256, _, tail = tail.partition("\"")
+ if parse_index:
+ pkg_name = filename
+ packages[pkg_name] = href
+ continue
- # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
- yanked = "data-yanked" in line
+ # 3. Efficient Attribute Parsing
+ dist_url, _, sha256 = href.partition("#sha256=")
+
+ # Handle Yanked status
+ yanked = None
+ if "data-yanked" in attrs:
+ yanked = _unescape_pypi_html(attrs["data-yanked"])
- head, _, _ = tail.rpartition("")
- maybe_metadata, _, filename = head.rpartition(">")
version = version_from_filename(filename)
sha256s_by_version.setdefault(version, []).append(sha256)
+ # 4. Optimized Metadata Check (PEP 714)
metadata_sha256 = ""
metadata_url = ""
- for metadata_marker in ["data-core-metadata", "data-dist-info-metadata"]:
- metadata_marker = metadata_marker + "=\"sha256="
- if metadata_marker in maybe_metadata:
- # Implement https://peps.python.org/pep-0714/
- _, _, tail = maybe_metadata.partition(metadata_marker)
- metadata_sha256, _, _ = tail.partition("\"")
- metadata_url = dist_url + ".metadata"
- break
+
+ # Dist-info is more common in modern PyPI
+ m_val = attrs.get("data-dist-info-metadata") or attrs.get("data-core-metadata")
+ if m_val and m_val != "false":
+ _, _, metadata_sha256 = m_val.partition("sha256=")
+ metadata_url = dist_url + ".metadata"
+
+ # 5. Result object
+ dist = struct(
+ filename = filename,
+ version = version,
+ url = dist_url,
+ sha256 = sha256,
+ metadata_sha256 = metadata_sha256,
+ metadata_url = metadata_url,
+ yanked = yanked,
+ )
if filename.endswith(".whl"):
- whls[sha256] = struct(
- filename = filename,
- version = version,
- url = dist_url,
- sha256 = sha256,
- metadata_sha256 = metadata_sha256,
- metadata_url = metadata_url,
- yanked = yanked,
- )
+ whls[sha256] = dist
else:
- sdists[sha256] = struct(
- filename = filename,
- version = version,
- url = dist_url,
- sha256 = sha256,
- metadata_sha256 = "",
- metadata_url = "",
- yanked = yanked,
- )
+ sdists[sha256] = dist
+
+ if packages:
+ return packages
return struct(
sdists = sdists,
whls = whls,
sha256s_by_version = sha256s_by_version,
)
+
+def _parse_attrs(attr_string):
+ """Parses attributes from a pre-sliced string."""
+ attrs = {}
+ parts = attr_string.split('"')
+
+ for i in range(0, len(parts) - 1, 2):
+ raw_key = parts[i].strip()
+ if not raw_key:
+ continue
+
+ key_parts = raw_key.split(" ")
+ current_key = key_parts[-1].rstrip("=")
+
+ # Batch handle booleans
+ for j in range(len(key_parts) - 1):
+ b = key_parts[j].strip()
+ if b:
+ attrs[b] = ""
+
+ attrs[current_key] = parts[i + 1]
+
+ # Final trailing boolean check
+ last = parts[-1].strip()
+ if last:
+ for b in last.split(" "):
+ if b:
+ attrs[b] = ""
+ return attrs
+
+def _unescape_pypi_html(text):
+ """Unescape HTML text.
+
+ Decodes standard HTML entities used in the Simple API.
+ Specifically targets characters used in URLs and attribute values.
+
+ Args:
+ text: {type}`str` The text to replace.
+
+ Returns:
+ A string with unescaped characters
+ """
+
+ # 1. Short circuit for the most common case
+ if not text or "&" not in text:
+ return text
+
+ # 2. Check for the most frequent PEP 503 entities first (version constraints).
+ # Re-ordering based on frequency reduces unnecessary checks for rare entities.
+ if ">" in text:
+ text = text.replace(">", ">")
+ if "<" in text:
+ text = text.replace("<", "<")
+
+ # 3. Grouped check for numeric entities.
+ # If '' isn't there, we skip 4 distinct string scans.
+ if "" in text:
+ if "'" in text:
+ text = text.replace("'", "'")
+ if "'" in text:
+ text = text.replace("'", "'")
+ if "
" in text:
+ text = text.replace("
", "\n")
+ if "
" in text:
+ text = text.replace("
", "\r")
+
+ if """ in text:
+ text = text.replace(""", '"')
+
+ # 4. Handle ampersands last to prevent double-decoding.
+ if "&" in text:
+ text = text.replace("&", "&")
+
+ return text
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 0f776ad434..cbef724670 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -35,6 +35,11 @@ def simpleapi_download(
_fail = fail):
"""Download Simple API HTML.
+ First it queries all of the indexes for available packages and then it downloads the contents of
+ the per-package URLs and sha256 values. This is to enable us to use bazel_downloader with
+ `requirements.txt` files. As a side effect we also are able to "cross-compile" by fetching the
+ right wheel for the right target platform through the information that we retrieve here.
+
Args:
ctx: The module_ctx or repository_ctx.
attr: Contains the parameters for the download. They are grouped into a
@@ -81,44 +86,51 @@ def simpleapi_download(
index_urls = [attr.index_url] + attr.extra_index_urls
read_simpleapi = read_simpleapi or _read_simpleapi
+ index_url_overrides = _get_index_overrides(
+ ctx,
+ index_urls,
+ index_url_overrides,
+ read_simpleapi = read_simpleapi,
+ cache = cache,
+ get_auth = get_auth,
+ attr = attr,
+ **download_kwargs
+ )
+
found_on_index = {}
warn_overrides = False
ctx.report_progress("Fetch package lists from PyPI index")
- for i, index_url in enumerate(index_urls):
- if i != 0:
- # Warn the user about a potential fix for the overrides
- warn_overrides = True
- async_downloads = {}
- sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
- for pkg in sources:
- pkg_normalized = normalize_name(pkg)
- url = urllib.strip_empty_path_segments("{index_url}/{distribution}/".format(
- index_url = index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
- distribution = pkg,
- ))
- result = read_simpleapi(
- ctx = ctx,
- attr = attr,
+ index_url = index_urls[0]
+ async_downloads = {}
+ sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
+
+ for pkg in sources:
+ pkg_normalized = normalize_name(pkg)
+ url = urllib.strip_empty_path_segments("{index_url}/{distribution}/".format(
+ index_url = index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
+ distribution = pkg,
+ ))
+ result = read_simpleapi(
+ ctx = ctx,
+ attr = attr,
+ url = url,
+ cache = cache,
+ get_auth = get_auth,
+ **download_kwargs
+ )
+ if hasattr(result, "wait"):
+ # We will process it in a separate loop:
+ async_downloads[pkg] = struct(
+ pkg_normalized = pkg_normalized,
+ wait = result.wait,
url = url,
- cache = cache,
- get_auth = get_auth,
- **download_kwargs
)
- if hasattr(result, "wait"):
- # We will process it in a separate loop:
- async_downloads[pkg] = struct(
- pkg_normalized = pkg_normalized,
- wait = result.wait,
- url = url,
- )
- elif result.success:
- contents[pkg_normalized] = _with_index_url(url, result.output)
- found_on_index[pkg] = index_url
-
- if not async_downloads:
- continue
+ elif result.success:
+ contents[pkg_normalized] = _with_index_url(url, result.output)
+ found_on_index[pkg] = index_url
+ if async_downloads:
# If we use `block` == False, then we need to have a second loop that is
# collecting all of the results as they were being downloaded in parallel.
for pkg, download in async_downloads.items():
@@ -166,7 +178,47 @@ If you would like to skip downloading metadata for these packages please add 'si
return contents
-def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
+def _get_index_overrides(ctx, index_urls, index_url_overrides, read_simpleapi, *, attr, **kwargs):
+ if index_url_overrides:
+ return index_url_overrides
+
+ first_index = index_urls[0]
+ found_on_index = {}
+ for index_url in index_urls:
+ sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
+ download = read_simpleapi(
+ ctx = ctx,
+ attr = attr,
+ url = urllib.strip_empty_path_segments("{index_url}/".format(
+ index_url = index_url,
+ )),
+ parse_index = True,
+ **kwargs
+ )
+ if hasattr(download, "wait"):
+ # TODO @aignas 2026-03-10: do parallel download if possible
+ # for all indexes at once
+ result = download.wait()
+ else:
+ result = download
+
+ if not result.success:
+ fail("TODO")
+
+ available_packages = result.output
+ sources = [pkg for pkg in sources if pkg in available_packages]
+ found_on_index.update({
+ pkg: index_url
+ for pkg in sources
+ })
+
+ return {
+ pkg: index_url
+ for pkg, index_url in found_on_index.items()
+ if index_url != first_index
+ }
+
+def _read_simpleapi(ctx, url, attr, cache, get_auth = None, parse_index = False, **download_kwargs):
"""Read SimpleAPI.
Args:
@@ -180,6 +232,7 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
{obj}`http_file` for docs.
cache: {type}`struct` the `pypi_cache` instance.
get_auth: A function to get auth information. Used in tests.
+ parse_index: TODO
**download_kwargs: Any extra params to ctx.download.
Note that output and auth will be passed for you.
@@ -221,7 +274,6 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
url = [real_url],
output = output,
auth = get_auth(ctx, [real_url], ctx_attr = attr),
- allow_fail = True,
**download_kwargs
)
@@ -234,6 +286,7 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
output = output,
cache = cache,
cache_key = cache_key,
+ parse_index = parse_index,
),
)
@@ -243,15 +296,16 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
output = output,
cache = cache,
cache_key = cache_key,
+ parse_index = parse_index,
)
-def _read_index_result(ctx, *, result, output, cache, cache_key):
+def _read_index_result(ctx, *, result, output, cache, cache_key, parse_index):
if not result.success:
return struct(success = False)
content = ctx.read(output)
- output = parse_simpleapi_html(content = content)
+ output = parse_simpleapi_html(content = content, parse_index = parse_index)
if output:
cache.setdefault(cache_key, output)
return struct(success = True, output = output)
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index 27040d36d7..170e12c4e4 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -777,7 +777,7 @@ def _test_simple_get_index(env):
"plat_pkg": struct(
whls = {
"deadb44f": struct(
- yanked = False,
+ yanked = None,
filename = "plat-pkg-0.0.4-py3-none-linux_x86_64.whl",
sha256 = "deadb44f",
url = "example2.org/index/plat_pkg/",
@@ -792,7 +792,7 @@ def _test_simple_get_index(env):
"simple": struct(
whls = {
"deadb00f": struct(
- yanked = False,
+ yanked = None,
filename = "simple-0.0.1-py3-none-any.whl",
sha256 = "deadb00f",
url = "example2.org",
@@ -800,7 +800,7 @@ def _test_simple_get_index(env):
},
sdists = {
"deadbeef": struct(
- yanked = False,
+ yanked = None,
filename = "simple-0.0.1.tar.gz",
sha256 = "deadbeef",
url = "example.org",
@@ -811,7 +811,7 @@ def _test_simple_get_index(env):
"some_other_pkg": struct(
whls = {
"deadb33f": struct(
- yanked = False,
+ yanked = None,
filename = "some-other-pkg-0.0.1-py3-none-any.whl",
sha256 = "deadb33f",
url = "example2.org/index/some_other_pkg/",
diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
index 0d03e94467..bea8ac5f78 100644
--- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl
+++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
@@ -143,7 +143,7 @@ def _test_simple(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -174,7 +174,7 @@ def _test_direct_urls_integration(env):
sha256 = "",
target_platforms = ["osx_x86_64"],
url = "https://github.com/org/foo/downloads/foo-1.1.tar.gz",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -184,7 +184,7 @@ def _test_direct_urls_integration(env):
sha256 = "",
target_platforms = ["linux_x86_64"],
url = "https://some-url/package.whl",
- yanked = False,
+ yanked = None,
),
],
),
@@ -216,7 +216,7 @@ def _test_direct_urls_no_extract(env):
sha256 = "",
target_platforms = ["osx_x86_64"],
url = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -226,7 +226,7 @@ def _test_direct_urls_no_extract(env):
sha256 = "",
target_platforms = ["linux_x86_64"],
url = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -258,7 +258,7 @@ def _test_extra_pip_args(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -287,7 +287,7 @@ def _test_dupe_requirements(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -318,7 +318,7 @@ def _test_multi_os(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -336,7 +336,7 @@ def _test_multi_os(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -346,7 +346,7 @@ def _test_multi_os(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -383,7 +383,7 @@ def _test_multi_os_legacy(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -401,7 +401,7 @@ def _test_multi_os_legacy(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -411,7 +411,7 @@ def _test_multi_os_legacy(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -464,7 +464,7 @@ def _test_env_marker_resolution(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -482,7 +482,7 @@ def _test_env_marker_resolution(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -512,7 +512,7 @@ def _test_different_package_version(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -522,7 +522,7 @@ def _test_different_package_version(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -552,7 +552,7 @@ def _test_different_package_extras(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -562,7 +562,7 @@ def _test_different_package_extras(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -591,7 +591,7 @@ def _test_optional_hash(env):
url = "https://example.org/bar-0.0.4.whl",
filename = "bar-0.0.4.whl",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -609,7 +609,7 @@ def _test_optional_hash(env):
url = "https://example.org/foo-0.0.5.whl",
filename = "foo-0.0.5.whl",
sha256 = "deadbeef",
- yanked = False,
+ yanked = None,
),
],
),
@@ -638,7 +638,7 @@ def _test_git_sources(env):
url = "",
filename = "",
sha256 = "",
- yanked = False,
+ yanked = None,
),
],
),
@@ -680,7 +680,7 @@ def _test_overlapping_shas_with_index_results(env):
url = "sdist",
sha256 = "5d15t",
filename = "foo-0.0.1.tar.gz",
- yanked = False,
+ yanked = None,
),
},
whls = {
@@ -688,13 +688,13 @@ def _test_overlapping_shas_with_index_results(env):
url = "super2",
sha256 = "deadb11f",
filename = "foo-0.0.1-py3-none-macosx_14_0_x86_64.whl",
- yanked = False,
+ yanked = None,
),
"deadbaaf": struct(
url = "super2",
sha256 = "deadbaaf",
filename = "foo-0.0.1-py3-none-any.whl",
- yanked = False,
+ yanked = None,
),
},
),
@@ -716,7 +716,7 @@ def _test_overlapping_shas_with_index_results(env):
sha256 = "deadbaaf",
target_platforms = ["cp39_linux_x86_64"],
url = "super2",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -726,7 +726,7 @@ def _test_overlapping_shas_with_index_results(env):
sha256 = "deadb11f",
target_platforms = ["cp39_osx_x86_64"],
url = "super2",
- yanked = False,
+ yanked = None,
),
],
),
@@ -771,13 +771,13 @@ def _test_get_index_urls_different_versions(env):
url = "super2",
sha256 = "deadb11f",
filename = "foo-0.0.2-py3-none-any.whl",
- yanked = False,
+ yanked = None,
),
"deadbaaf": struct(
url = "super2",
sha256 = "deadbaaf",
filename = "foo-0.0.1-py3-none-any.whl",
- yanked = False,
+ yanked = None,
),
},
),
@@ -810,7 +810,7 @@ def _test_get_index_urls_different_versions(env):
sha256 = "",
target_platforms = ["cp39_linux_x86_64"],
url = "",
- yanked = False,
+ yanked = None,
),
struct(
distribution = "foo",
@@ -820,7 +820,7 @@ def _test_get_index_urls_different_versions(env):
sha256 = "deadb11f",
target_platforms = ["cp310_linux_x86_64"],
url = "super2",
- yanked = False,
+ yanked = None,
),
],
),
@@ -855,7 +855,7 @@ def _test_get_index_urls_single_py_version(env):
url = "super2",
sha256 = "deadb11f",
filename = "foo-0.0.2-py3-none-any.whl",
- yanked = False,
+ yanked = None,
),
},
),
@@ -885,7 +885,7 @@ def _test_get_index_urls_single_py_version(env):
sha256 = "deadb11f",
target_platforms = ["cp310_linux_x86_64"],
url = "super2",
- yanked = False,
+ yanked = None,
),
],
),
diff --git a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
index f33ba05c91..f72d61371c 100644
--- a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
+++ b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
@@ -57,7 +57,7 @@ def _test_sdist(env):
filename = "foo-0.0.1.tar.gz",
sha256 = "deadbeefasource",
url = "https://example.org/full-url/foo-0.0.1.tar.gz",
- yanked = False,
+ yanked = None,
version = "0.0.1",
),
),
@@ -65,7 +65,25 @@ def _test_sdist(env):
struct(
attrs = [
'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
- 'data-requires-python=">=3.7"',
+ 'data-requires-python=">=3.7"',
+ "data-yanked",
+ ],
+ filename = "foo-0.0.1.tar.gz",
+ ),
+ struct(
+ filename = "foo-0.0.1.tar.gz",
+ sha256 = "deadbeefasource",
+ url = "https://example.org/full-url/foo-0.0.1.tar.gz",
+ version = "0.0.1",
+ yanked = "",
+ ),
+ ),
+ (
+ struct(
+ attrs = [
+ 'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
+ 'data-requires-python=">=3.7"',
+ "data-yanked=\"Something
with "quotes"
over two lines\"",
],
filename = "foo-0.0.1.tar.gz",
),
@@ -74,7 +92,25 @@ def _test_sdist(env):
sha256 = "deadbeefasource",
url = "https://example.org/full-url/foo-0.0.1.tar.gz",
version = "0.0.1",
- yanked = False,
+ # NOTE @aignas 2026-03-09: we preserve the white space
+ yanked = "Something \nwith \"quotes\"\nover two lines",
+ ),
+ ),
+ (
+ struct(
+ attrs = [
+ 'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
+ 'data-requires-python=">=3.7"',
+ 'data-yanked=""',
+ ],
+ filename = "foo-0.0.1.tar.gz",
+ ),
+ struct(
+ filename = "foo-0.0.1.tar.gz",
+ sha256 = "deadbeefasource",
+ url = "https://example.org/full-url/foo-0.0.1.tar.gz",
+ version = "0.0.1",
+ yanked = "",
),
),
]
@@ -94,7 +130,7 @@ def _test_sdist(env):
filename = subjects.str,
sha256 = subjects.str,
url = subjects.str,
- yanked = subjects.bool,
+ yanked = subjects.str,
version = subjects.str,
),
)
@@ -126,14 +162,14 @@ def _test_whls(env):
sha256 = "deadbeef",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
version = "0.0.2",
- yanked = False,
+ yanked = None,
),
),
(
struct(
attrs = [
'href="https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef"',
- 'data-requires-python=">=3.7"',
+ 'data-requires-python=">=3.7"',
'data-dist-info-metadata="sha256=deadb00f"',
'data-core-metadata="sha256=deadb00f"',
],
@@ -146,7 +182,7 @@ def _test_whls(env):
sha256 = "deadbeef",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
version = "0.0.2",
- yanked = False,
+ yanked = None,
),
),
(
@@ -165,7 +201,7 @@ def _test_whls(env):
sha256 = "deadbeef",
version = "0.0.2",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- yanked = False,
+ yanked = None,
),
),
(
@@ -184,7 +220,7 @@ def _test_whls(env):
sha256 = "deadbeef",
version = "0.0.2",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- yanked = False,
+ yanked = None,
),
),
(
@@ -202,7 +238,7 @@ def _test_whls(env):
sha256 = "deadbeef",
url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
version = "0.0.2",
- yanked = False,
+ yanked = None,
),
),
]
@@ -223,7 +259,7 @@ def _test_whls(env):
metadata_url = subjects.str,
sha256 = subjects.str,
url = subjects.str,
- yanked = subjects.bool,
+ yanked = subjects.str,
version = subjects.str,
),
)