Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ END_UNRELEASED_TEMPLATE
{obj}`--stamp` flag.
* (pypi) Now the RECORD file patches will follow the quoted or unquoted filenames convention
in order to make `pytorch` and friends easier to patch.
* (wheel) `py_wheel` no longer expands the input depset during analysis,
improving analysis performance for targets with large dependency trees.

{#v0-0-0-fixed}
### Fixed
Expand Down
11 changes: 6 additions & 5 deletions python/private/py_wheel.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -344,12 +344,13 @@ def _py_wheel_impl(ctx):
# Currently this is only the description file (if used).
other_inputs = []

# Wrap the inputs into a file to reduce command line length.
# Wrap the inputs into a file to reduce command line length, deferring
# depset expansion to execution time via Args.add_all with map_each.
packageinputfile = ctx.actions.declare_file(ctx.attr.name + "_target_wrapped_inputs.txt")
content = ""
for input_file in inputs_to_package.to_list():
content += _input_file_to_arg(input_file) + "\n"
ctx.actions.write(output = packageinputfile, content = content)
package_args = ctx.actions.args()
package_args.set_param_file_format("multiline")
package_args.add_all(inputs_to_package, map_each = _input_file_to_arg)
ctx.actions.write(output = packageinputfile, content = package_args)
other_inputs.append(packageinputfile)

args = ctx.actions.args()
Expand Down
76 changes: 76 additions & 0 deletions tests/py_wheel_performance/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Performance test for py_wheel analysis-time scaling.

Verifies that py_wheel analysis time scales linearly with dep count,
not quadratically (as it would if inputs_to_package.to_list() were
called during analysis).
"""

load("@rules_shell//shell:sh_test.bzl", "sh_test")
load("//python:packaging.bzl", "py_wheel")
load("//python:py_test.bzl", "py_test")
load(":gen_py_libs.bzl", "gen_py_libs")

package(default_visibility = ["//visibility:private"])

# Two py_wheel targets at different sizes to measure scaling behavior.
# If analysis is linear, 10k should take ~2x as long as 5k.
# If analysis is quadratic (the old to_list() bug), 10k takes ~4x as long.

SMALL_DEPS = gen_py_libs(
name = "small",
count = 5000,
)

LARGE_DEPS = gen_py_libs(
name = "large",
count = 10000,
)

py_wheel(
name = "small_wheel",
distribution = "small_wheel",
python_tag = "py3",
version = "0.0.1",
deps = SMALL_DEPS,
)

py_wheel(
name = "large_wheel",
distribution = "large_wheel",
python_tag = "py3",
version = "0.0.1",
deps = LARGE_DEPS,
)

# Smaller wheel (100 deps) for correctness verification.
VERIFY_DEPS = gen_py_libs(
name = "verify",
count = 100,
)

py_wheel(
name = "verify_wheel",
distribution = "verify_wheel",
python_tag = "py3",
version = "0.0.1",
deps = VERIFY_DEPS,
)

py_test(
name = "py_wheel_contents_test",
srcs = ["py_wheel_contents_test.py"],
data = [":verify_wheel"],
deps = ["//python/runfiles"],
)

sh_test(
name = "py_wheel_analysis_scaling_test",
srcs = ["py_wheel_analysis_scaling_test.sh"],
tags = [
"exclusive",
"integration-test",
"manual",
"no-remote-exec",
"no-sandbox",
],
)
66 changes: 66 additions & 0 deletions tests/py_wheel_performance/gen_py_libs.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Macro to generate many py_library targets for benchmarking py_wheel."""

load("@bazel_skylib//rules:write_file.bzl", "write_file")
load("//python:py_library.bzl", "py_library")

def gen_py_libs(name, count):
"""Generate `count` py_library targets, each with a single .py file.

Uses deeply nested paths to simulate real-world package structures.
Longer paths amplify the cost of O(n^2) string concatenation in the
analysis phase, making quadratic scaling easier to detect.

Args:
name: Base name prefix for generated targets.
count: Number of py_library targets to generate.

Returns:
A list of label strings for use as py_wheel deps.
"""

# Deep path prefix to make each _input_file_to_arg line long, simulating
# real-world monorepo package paths. Longer per-line strings make the
# quadratic string-concat cost dominate over linear target loading,
# so the scaling ratio reliably distinguishes O(n) from O(n^2).
deep_prefix = "/".join([
"pkg_{}".format(name),
"src",
"python",
"company_name_placeholder",
"organization_unit_division",
"engineering_team_name",
"project_name_repository",
"subproject_component_area",
"internal_implementation_detail",
"generated_sources_directory",
"modules_directory_location",
"feature_area_subdivision",
"subsystem_layer_component",
"detail_level_implementation",
"version_specific_code_path",
"platform_dependent_modules",
])

labels = []
for i in range(count):
src_name = "{}_src_{}".format(name, i)
lib_name = "{}_lib_{}".format(name, i)

write_file(
name = src_name,
out = "{}/module_{}.py".format(deep_prefix, i),
content = [
"# Generated module {} of {}".format(i, count),
"VALUE = {}".format(i),
"",
],
)

py_library(
name = lib_name,
srcs = [src_name],
)

labels.append(":{}".format(lib_name))

return labels
118 changes: 118 additions & 0 deletions tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env bash
# Test that py_wheel analysis time scales linearly with dep count.
#
# The old implementation called inputs_to_package.to_list() during analysis
# and built a string via concatenation, giving O(n^2) scaling. The fix uses
# Args.add_all(map_each=...) which defers to execution time, giving O(n).
#
# This test builds two py_wheel targets (5k and 10k deps) in analysis-only
# mode and checks that the ratio of analysis times is closer to 2x (linear)
# than 4x (quadratic).
#
# Uses --nokeep_state_after_build to discard the analysis cache after each
# build, forcing a full re-analysis on the next invocation while keeping
# the Bazel server warm (avoiding startup time noise).

set -euo pipefail

SMALL_TARGET="//tests/py_wheel_performance:small_wheel"
LARGE_TARGET="//tests/py_wheel_performance:large_wheel"
# Threshold ratio: linear=2.0, quadratic=4.0. We use 3.0 as the boundary.
MAX_RATIO="3.0"
ITERATIONS=3

# Invalidate the analysis cache so the next build must re-analyze.
invalidate_analysis_cache() {
bazel build --nobuild --nokeep_state_after_build "$@" 2>/dev/null
}

# Extract the "interleaved loading-and-analysis" phase time (in ms) from
# a Bazel profile, falling back to wall-clock time if parsing fails.
extract_analysis_ms() {
local profile="$1"
local ms
ms=$(bazel analyze-profile "${profile}" 2>&1 \
| grep "loading-and-analysis" \
| grep -oP '[\d.]+(?= s)' \
| head -1 \
| awk '{printf "%d", $1 * 1000}')
echo "${ms:-0}"
}

measure_analysis_time() {
local target="$1"
local best_ms=999999999

for i in $(seq 1 "${ITERATIONS}"); do
# Discard analysis cache from any prior build.
invalidate_analysis_cache "${target}"

# Measure a fresh analysis pass.
local profile
profile=$(mktemp /tmp/py_wheel_perf_XXXXXX.profile)
bazel build --nobuild --profile="${profile}" "${target}" 2>/dev/null

local analysis_ms
analysis_ms=$(extract_analysis_ms "${profile}")
rm -f "${profile}"

# Fall back to wall time if profile parsing returned 0.
if [[ "${analysis_ms}" == "0" ]]; then
invalidate_analysis_cache "${target}"
local start end
start=$(date +%s%N)
bazel build --nobuild "${target}" 2>/dev/null
end=$(date +%s%N)
analysis_ms=$(( (end - start) / 1000000 ))
fi

echo " iteration ${i}: ${analysis_ms} ms" >&2

if (( analysis_ms < best_ms )); then
best_ms=${analysis_ms}
fi
done

echo "${best_ms}"
}

echo "=== py_wheel analysis scaling test ==="
echo ""

# Warm up: ensure Bazel server is running and external deps are fetched.
echo "Warming up..."
bazel build --nobuild "${SMALL_TARGET}" 2>/dev/null || true
bazel build --nobuild "${LARGE_TARGET}" 2>/dev/null || true
echo ""

echo "Measuring small wheel (5k deps), best of ${ITERATIONS}..."
small_ms=$(measure_analysis_time "${SMALL_TARGET}")
echo " Result: ${small_ms} ms"

echo "Measuring large wheel (10k deps), best of ${ITERATIONS}..."
large_ms=$(measure_analysis_time "${LARGE_TARGET}")
echo " Result: ${large_ms} ms"

# Compute ratio using awk for floating point
ratio=$(awk "BEGIN { printf \"%.2f\", ${large_ms} / ${small_ms} }")

echo ""
echo "=== Results ==="
echo " Small (5k deps): ${small_ms} ms"
echo " Large (10k deps): ${large_ms} ms"
echo " Ratio (10k/5k): ${ratio}x"
echo " Max allowed: ${MAX_RATIO}x"
echo ""

# Check that ratio is below threshold
passed=$(awk "BEGIN { print (${ratio} <= ${MAX_RATIO}) ? 1 : 0 }")

if [[ "${passed}" == "1" ]]; then
echo "PASSED: Scaling ratio ${ratio}x is within linear bound (<= ${MAX_RATIO}x)"
exit 0
else
echo "FAILED: Scaling ratio ${ratio}x exceeds ${MAX_RATIO}x, suggesting quadratic behavior"
echo " Expected linear scaling (~2.0x) from Args.add_all(map_each=...)"
echo " Got ${ratio}x which is closer to quadratic (4.0x)"
exit 1
fi
66 changes: 66 additions & 0 deletions tests/py_wheel_performance/py_wheel_contents_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Test that py_wheel produces correct wheel contents with many deps.

Verifies that the Args.add_all(map_each=...) approach used to write the
input file list produces a wheel with the expected files.
"""

import os
import unittest
import zipfile

from python.runfiles import runfiles

_WHEEL_NAME = "verify_wheel-0.0.1-py3-none-any.whl"
_EXPECTED_MODULE_COUNT = 100


class PyWheelContentsTest(unittest.TestCase):

def setUp(self):
self.rf = runfiles.Create()
whl_path = self.rf.Rlocation(
os.path.join("rules_python", "tests", "py_wheel_performance", _WHEEL_NAME)
)
self.assertIsNotNone(whl_path, "Could not find wheel via runfiles")
self.assertTrue(os.path.exists(whl_path), f"Wheel not found: {whl_path}")
self.whl_path = whl_path

def test_verify_wheel_has_all_modules(self):
"""Verify the wheel contains exactly the expected number of .py files."""
with zipfile.ZipFile(self.whl_path) as whl:
py_files = [n for n in whl.namelist() if n.endswith(".py")]
self.assertEqual(
len(py_files),
_EXPECTED_MODULE_COUNT,
f"Expected {_EXPECTED_MODULE_COUNT} .py files in wheel, got {len(py_files)}",
)

def test_verify_wheel_file_contents(self):
"""Verify the .py files in the wheel have the expected content."""
with zipfile.ZipFile(self.whl_path) as whl:
py_files = sorted(n for n in whl.namelist() if n.endswith(".py"))
self.assertTrue(py_files, "No .py files found in wheel")
first = whl.read(py_files[0]).decode("utf-8")
self.assertIn("Generated module", first)
self.assertIn("VALUE =", first)

def test_verify_wheel_metadata(self):
"""Verify the wheel has proper metadata files."""
with zipfile.ZipFile(self.whl_path) as whl:
names = whl.namelist()
metadata_files = [
n for n in names if "METADATA" in n or "WHEEL" in n or "RECORD" in n
]
self.assertTrue(
len(metadata_files) >= 3,
f"Expected METADATA, WHEEL, RECORD files; got {metadata_files}",
)

metadata_path = [n for n in names if n.endswith("METADATA")][0]
metadata = whl.read(metadata_path).decode("utf-8")
self.assertIn("Name: verify_wheel", metadata)
self.assertIn("Version: 0.0.1", metadata)


if __name__ == "__main__":
unittest.main()