bazel-contrib · rickeylev · Feb 15, 2026 · Feb 14, 2026 · Feb 15, 2026 · Feb 15, 2026
@@ -72,6 +72,8 @@ END_UNRELEASED_TEMPLATE
   {obj}`--stamp` flag.
 * (pypi) Now the RECORD file patches will follow the quoted or unquoted filenames convention
   in order to make `pytorch` and friends easier to patch.
+* (wheel) `py_wheel` no longer expands the input depset during analysis,
+  improving analysis performance for targets with large dependency trees.
 
 {#v0-0-0-fixed}
 ### Fixed

@@ -344,12 +344,13 @@ def _py_wheel_impl(ctx):
     # Currently this is only the description file (if used).
     other_inputs = []
 
-    # Wrap the inputs into a file to reduce command line length.
+    # Wrap the inputs into a file to reduce command line length, deferring
+    # depset expansion to execution time via Args.add_all with map_each.
     packageinputfile = ctx.actions.declare_file(ctx.attr.name + "_target_wrapped_inputs.txt")
-    content = ""
-    for input_file in inputs_to_package.to_list():
-        content += _input_file_to_arg(input_file) + "\n"
-    ctx.actions.write(output = packageinputfile, content = content)
+    package_args = ctx.actions.args()
+    package_args.set_param_file_format("multiline")
+    package_args.add_all(inputs_to_package, map_each = _input_file_to_arg)
+    ctx.actions.write(output = packageinputfile, content = package_args)
     other_inputs.append(packageinputfile)
 
     args = ctx.actions.args()

diff --git a/tests/py_wheel_performance/BUILD.bazel b/tests/py_wheel_performance/BUILD.bazel
@@ -0,0 +1,76 @@
+"""Performance test for py_wheel analysis-time scaling.
+
+Verifies that py_wheel analysis time scales linearly with dep count,
+not quadratically (as it would if inputs_to_package.to_list() were
+called during analysis).
+"""
+
+load("@rules_shell//shell:sh_test.bzl", "sh_test")
+load("//python:packaging.bzl", "py_wheel")
+load("//python:py_test.bzl", "py_test")
+load(":gen_py_libs.bzl", "gen_py_libs")
+
+package(default_visibility = ["//visibility:private"])
+
+# Two py_wheel targets at different sizes to measure scaling behavior.
+# If analysis is linear, 10k should take ~2x as long as 5k.
+# If analysis is quadratic (the old to_list() bug), 10k takes ~4x as long.
+
+SMALL_DEPS = gen_py_libs(
+    name = "small",
+    count = 5000,
+)
+
+LARGE_DEPS = gen_py_libs(
+    name = "large",
+    count = 10000,
+)
+
+py_wheel(
+    name = "small_wheel",
+    distribution = "small_wheel",
+    python_tag = "py3",
+    version = "0.0.1",
+    deps = SMALL_DEPS,
+)
+
+py_wheel(
+    name = "large_wheel",
+    distribution = "large_wheel",
+    python_tag = "py3",
+    version = "0.0.1",
+    deps = LARGE_DEPS,
+)
+
+# Smaller wheel (100 deps) for correctness verification.
+VERIFY_DEPS = gen_py_libs(
+    name = "verify",
+    count = 100,
+)
+
+py_wheel(
+    name = "verify_wheel",
+    distribution = "verify_wheel",
+    python_tag = "py3",
+    version = "0.0.1",
+    deps = VERIFY_DEPS,
+)
+
+py_test(
+    name = "py_wheel_contents_test",
+    srcs = ["py_wheel_contents_test.py"],
+    data = [":verify_wheel"],
+    deps = ["//python/runfiles"],
+)
+
+sh_test(
+    name = "py_wheel_analysis_scaling_test",
+    srcs = ["py_wheel_analysis_scaling_test.sh"],
+    tags = [
+        "exclusive",
+        "integration-test",
+        "manual",
+        "no-remote-exec",
+        "no-sandbox",
+    ],
+)
diff --git a/tests/py_wheel_performance/gen_py_libs.bzl b/tests/py_wheel_performance/gen_py_libs.bzl
@@ -0,0 +1,66 @@
+"""Macro to generate many py_library targets for benchmarking py_wheel."""
+
+load("@bazel_skylib//rules:write_file.bzl", "write_file")
+load("//python:py_library.bzl", "py_library")
+
+def gen_py_libs(name, count):
+    """Generate `count` py_library targets, each with a single .py file.
+
+    Uses deeply nested paths to simulate real-world package structures.
+    Longer paths amplify the cost of O(n^2) string concatenation in the
+    analysis phase, making quadratic scaling easier to detect.
+
+    Args:
+        name: Base name prefix for generated targets.
+        count: Number of py_library targets to generate.
+
+    Returns:
+        A list of label strings for use as py_wheel deps.
+    """
+
+    # Deep path prefix to make each _input_file_to_arg line long, simulating
+    # real-world monorepo package paths. Longer per-line strings make the
+    # quadratic string-concat cost dominate over linear target loading,
+    # so the scaling ratio reliably distinguishes O(n) from O(n^2).
+    deep_prefix = "/".join([
+        "pkg_{}".format(name),
+        "src",
+        "python",
+        "company_name_placeholder",
+        "organization_unit_division",
+        "engineering_team_name",
+        "project_name_repository",
+        "subproject_component_area",
+        "internal_implementation_detail",
+        "generated_sources_directory",
+        "modules_directory_location",
+        "feature_area_subdivision",
+        "subsystem_layer_component",
+        "detail_level_implementation",
+        "version_specific_code_path",
+        "platform_dependent_modules",
+    ])
+
+    labels = []
+    for i in range(count):
+        src_name = "{}_src_{}".format(name, i)
+        lib_name = "{}_lib_{}".format(name, i)
+
+        write_file(
+            name = src_name,
+            out = "{}/module_{}.py".format(deep_prefix, i),
+            content = [
+                "# Generated module {} of {}".format(i, count),
+                "VALUE = {}".format(i),
+                "",
+            ],
+        )
+
+        py_library(
+            name = lib_name,
+            srcs = [src_name],
+        )
+
+        labels.append(":{}".format(lib_name))
+
+    return labels
diff --git a/tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh b/tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+# Test that py_wheel analysis time scales linearly with dep count.
+#
+# The old implementation called inputs_to_package.to_list() during analysis
+# and built a string via concatenation, giving O(n^2) scaling. The fix uses
+# Args.add_all(map_each=...) which defers to execution time, giving O(n).
+#
+# This test builds two py_wheel targets (5k and 10k deps) in analysis-only
+# mode and checks that the ratio of analysis times is closer to 2x (linear)
+# than 4x (quadratic).
+#
+# Uses --nokeep_state_after_build to discard the analysis cache after each
+# build, forcing a full re-analysis on the next invocation while keeping
+# the Bazel server warm (avoiding startup time noise).
+
+set -euo pipefail
+
+SMALL_TARGET="//tests/py_wheel_performance:small_wheel"
+LARGE_TARGET="//tests/py_wheel_performance:large_wheel"
+# Threshold ratio: linear=2.0, quadratic=4.0. We use 3.0 as the boundary.
+MAX_RATIO="3.0"
+ITERATIONS=3
+
+# Invalidate the analysis cache so the next build must re-analyze.
+invalidate_analysis_cache() {
+    bazel build --nobuild --nokeep_state_after_build "$@" 2>/dev/null
+}
+
+# Extract the "interleaved loading-and-analysis" phase time (in ms) from
+# a Bazel profile, falling back to wall-clock time if parsing fails.
+extract_analysis_ms() {
+    local profile="$1"
+    local ms
+    ms=$(bazel analyze-profile "${profile}" 2>&1 \
+        | grep "loading-and-analysis" \
+        | grep -oP '[\d.]+(?= s)' \
+        | head -1 \
+        | awk '{printf "%d", $1 * 1000}')
+    echo "${ms:-0}"
+}
+
+measure_analysis_time() {
+    local target="$1"
+    local best_ms=999999999
+
+    for i in $(seq 1 "${ITERATIONS}"); do
+        # Discard analysis cache from any prior build.
+        invalidate_analysis_cache "${target}"
+
+        # Measure a fresh analysis pass.
+        local profile
+        profile=$(mktemp /tmp/py_wheel_perf_XXXXXX.profile)
+        bazel build --nobuild --profile="${profile}" "${target}" 2>/dev/null
+
+        local analysis_ms
+        analysis_ms=$(extract_analysis_ms "${profile}")
+        rm -f "${profile}"
+
+        # Fall back to wall time if profile parsing returned 0.
+        if [[ "${analysis_ms}" == "0" ]]; then
+            invalidate_analysis_cache "${target}"
+            local start end
+            start=$(date +%s%N)
+            bazel build --nobuild "${target}" 2>/dev/null
+            end=$(date +%s%N)
+            analysis_ms=$(( (end - start) / 1000000 ))
+        fi
+
+        echo "    iteration ${i}: ${analysis_ms} ms" >&2
+
+        if (( analysis_ms < best_ms )); then
+            best_ms=${analysis_ms}
+        fi
+    done
+
+    echo "${best_ms}"
+}
+
+echo "=== py_wheel analysis scaling test ==="
+echo ""
+
+# Warm up: ensure Bazel server is running and external deps are fetched.
+echo "Warming up..."
+bazel build --nobuild "${SMALL_TARGET}" 2>/dev/null || true
+bazel build --nobuild "${LARGE_TARGET}" 2>/dev/null || true
+echo ""
+
+echo "Measuring small wheel (5k deps), best of ${ITERATIONS}..."
+small_ms=$(measure_analysis_time "${SMALL_TARGET}")
+echo "  Result: ${small_ms} ms"
+
+echo "Measuring large wheel (10k deps), best of ${ITERATIONS}..."
+large_ms=$(measure_analysis_time "${LARGE_TARGET}")
+echo "  Result: ${large_ms} ms"
+
+# Compute ratio using awk for floating point
+ratio=$(awk "BEGIN { printf \"%.2f\", ${large_ms} / ${small_ms} }")
+
+echo ""
+echo "=== Results ==="
+echo "  Small (5k deps):  ${small_ms} ms"
+echo "  Large (10k deps): ${large_ms} ms"
+echo "  Ratio (10k/5k):   ${ratio}x"
+echo "  Max allowed:       ${MAX_RATIO}x"
+echo ""
+
+# Check that ratio is below threshold
+passed=$(awk "BEGIN { print (${ratio} <= ${MAX_RATIO}) ? 1 : 0 }")
+
+if [[ "${passed}" == "1" ]]; then
+    echo "PASSED: Scaling ratio ${ratio}x is within linear bound (<= ${MAX_RATIO}x)"
+    exit 0
+else
+    echo "FAILED: Scaling ratio ${ratio}x exceeds ${MAX_RATIO}x, suggesting quadratic behavior"
+    echo "  Expected linear scaling (~2.0x) from Args.add_all(map_each=...)"
+    echo "  Got ${ratio}x which is closer to quadratic (4.0x)"
+    exit 1
+fi
diff --git a/tests/py_wheel_performance/py_wheel_contents_test.py b/tests/py_wheel_performance/py_wheel_contents_test.py
@@ -0,0 +1,66 @@
+"""Test that py_wheel produces correct wheel contents with many deps.
+
+Verifies that the Args.add_all(map_each=...) approach used to write the
+input file list produces a wheel with the expected files.
+"""
+
+import os
+import unittest
+import zipfile
+
+from python.runfiles import runfiles
+
+_WHEEL_NAME = "verify_wheel-0.0.1-py3-none-any.whl"
+_EXPECTED_MODULE_COUNT = 100
+
+
+class PyWheelContentsTest(unittest.TestCase):
+
+    def setUp(self):
+        self.rf = runfiles.Create()
+        whl_path = self.rf.Rlocation(
+            os.path.join("rules_python", "tests", "py_wheel_performance", _WHEEL_NAME)
+        )
+        self.assertIsNotNone(whl_path, "Could not find wheel via runfiles")
+        self.assertTrue(os.path.exists(whl_path), f"Wheel not found: {whl_path}")
+        self.whl_path = whl_path
+
+    def test_verify_wheel_has_all_modules(self):
+        """Verify the wheel contains exactly the expected number of .py files."""
+        with zipfile.ZipFile(self.whl_path) as whl:
+            py_files = [n for n in whl.namelist() if n.endswith(".py")]
+            self.assertEqual(
+                len(py_files),
+                _EXPECTED_MODULE_COUNT,
+                f"Expected {_EXPECTED_MODULE_COUNT} .py files in wheel, got {len(py_files)}",
+            )
+
+    def test_verify_wheel_file_contents(self):
+        """Verify the .py files in the wheel have the expected content."""
+        with zipfile.ZipFile(self.whl_path) as whl:
+            py_files = sorted(n for n in whl.namelist() if n.endswith(".py"))
+            self.assertTrue(py_files, "No .py files found in wheel")
+            first = whl.read(py_files[0]).decode("utf-8")
+            self.assertIn("Generated module", first)
+            self.assertIn("VALUE =", first)
+
+    def test_verify_wheel_metadata(self):
+        """Verify the wheel has proper metadata files."""
+        with zipfile.ZipFile(self.whl_path) as whl:
+            names = whl.namelist()
+            metadata_files = [
+                n for n in names if "METADATA" in n or "WHEEL" in n or "RECORD" in n
+            ]
+            self.assertTrue(
+                len(metadata_files) >= 3,
+                f"Expected METADATA, WHEEL, RECORD files; got {metadata_files}",
+            )
+
+            metadata_path = [n for n in names if n.endswith("METADATA")][0]
+            metadata = whl.read(metadata_path).decode("utf-8")
+            self.assertIn("Name: verify_wheel", metadata)
+            self.assertIn("Version: 0.0.1", metadata)
+
+
+if __name__ == "__main__":
+    unittest.main()