Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/pr_gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,31 @@ jobs:
- name: Python test
run: pytest -vv -s

windows-x86:
runs-on: windows-latest
strategy:
matrix:
python_version: ["3.12"]
steps:
- name: Checkout
uses: actions/checkout@v6

- name: Use Python ${{matrix.python_version}} (x86)
uses: actions/setup-python@v6
with:
python-version: ${{matrix.python_version}}
architecture: x86

- name: Get dependencies
run: |
python -m pip install --upgrade pip

- name: Python build
run: pip install -e .[test] --verbose

- name: Python test
run: pytest -vv -s

macos-arm64:
runs-on: macos-latest
strategy:
Expand Down
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,46 @@ the free-threaded build and the relevant CPython APIs stabilise.
[pep703]: https://peps.python.org/pep-0703/
[issue5]: https://github.com/microsoft/bocpy/issues/5

### Scaling with cores

The chart below shows BOC runtime throughput as the worker count grows from
1 to 8, plotted as **speedup relative to a single worker**. Numbers come
from [examples/benchmark.py](examples/benchmark.py) — a chain-ring workload
that exercises the scheduler, two-phase locking, sub-interpreter crossings
and the message queue together — run on CPython 3.14 (mean of 3 repeats,
8 s each).

<!-- pypi-skip-start -->
```mermaid
---
config:
xyChart:
width: 700
height: 360
---
xychart-beta
title "bocpy speedup vs. worker count (chain-ring benchmark, CPython 3.14)"
x-axis "Workers" [1, 2, 3, 4, 5, 6, 7, 8]
y-axis "Speedup vs. 1 worker" 0 --> 9
bar [1.00, 1.97, 2.94, 3.90, 4.87, 5.82, 6.75, 7.54]
line [1, 2, 3, 4, 5, 6, 7, 8]
```
<!-- pypi-skip-end -->

The line is the ideal `y = x` reference; the bars are measured speedup. Up
to 8 workers, BOC delivers roughly linear scaling on this microbenchmark
(≈7.5× at 8 workers). Real applications carry serial costs that this
benchmark deliberately strips out — see the docstring at the top of
[examples/benchmark.py](examples/benchmark.py) for the load-bearing
caveats. To reproduce:

```bash
python examples/benchmark.py \
--sweep-axis workers --sweep-values 1,2,3,4,5,6,7,8 \
--duration 8 --warmup 2 --repeats 3 \
--output scaling.json
```

A behavior can be thought of as a function which depends on zero or more
concurrently-owned data objects (which we call **cowns**). As a programmer, you
indicate that you want the function to be called once all of those resources are
Expand Down
45 changes: 37 additions & 8 deletions examples/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

from bocpy import (Cown, Matrix, noticeboard, notice_write, receive, send,
start, wait, when)
from bocpy import _core

# Sentinels for the parent/child JSON protocol. Uppercase so the
# transpiler keeps them as module-level constants in the worker export.
Expand All @@ -42,6 +43,21 @@
SCHEMA_VERSION = 1


def _physical_cpu_count() -> int:
"""Return physical core count, falling back to logical or 1.

Used as the oversubscription threshold so warnings fire when the
requested worker count starts using SMT siblings rather than
waiting until logical cores are exhausted.

:return: A positive integer.
"""
n = _core.physical_cpu_count()
if n > 0:
return n
return os.cpu_count() or 1


# ---------------------------------------------------------------------------
# Behavior code (chain workload)
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -203,11 +219,22 @@ def derive_sizes(cfg: BenchConfig) -> BenchConfig:
:return: The same config with ``rings`` / ``chains_per_ring`` set.
"""
if cfg.chains_per_ring is None:
# Use a small per-ring chain count (4) so chains never collide
# on adjacent slots as they advance. Independent rings carry
# the load instead.
cfg.chains_per_ring = max(
1, cfg.ring_size // (cfg.group_size * cfg.stride * 2))
1, cfg.ring_size // (cfg.group_size * cfg.stride * 8))
if cfg.rings is None:
cfg.rings = max(cfg.workers * 4 // cfg.chains_per_ring,
cfg.workers * 2)
# Bias toward more *rings* rather than more chains-per-ring:
# chains on the same ring contend for adjacent slots as they
# advance, so per-ring concurrency is bounded well below
# ``chains_per_ring``. Independent rings, by contrast, never
# collide. Provision at least ``workers * 4`` rings so every
# worker sees a deep, independent supply of ready chains and
# the measured throughput reflects scheduler scaling rather
# than workload starvation.
cfg.rings = max(cfg.workers * 16 // cfg.chains_per_ring,
cfg.workers * 4)
return cfg


Expand Down Expand Up @@ -244,8 +271,9 @@ def emit_soft_warnings(cfg: BenchConfig, cpu_count: int) -> None:
print(f"warning: duration={cfg.duration}s is short; results will "
"be noisy", file=sys.stderr)
if cfg.workers > cpu_count:
print(f"warning: workers={cfg.workers} exceeds cpu_count="
f"{cpu_count}; oversubscribed", file=sys.stderr)
print(f"warning: workers={cfg.workers} exceeds physical core "
f"count={cpu_count}; oversubscribed (SMT siblings or "
f"hyperthreads will be used)", file=sys.stderr)


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -862,6 +890,7 @@ def collect_metadata(argv: list, git_sha: Optional[str]) -> dict:
"hostname": socket.gethostname(),
"platform": sys.platform,
"cpu_count": os.cpu_count() or 0,
"physical_cpu_count": _physical_cpu_count(),
"python_version": sys.version.split()[0],
"python_implementation": sys.implementation.name,
"free_threaded": free_threaded,
Expand Down Expand Up @@ -1054,7 +1083,7 @@ def _default_sweep_values(axis: str) -> list:
:param axis: The sweep axis name.
:return: A list of default values.
"""
cpu = os.cpu_count() or 1
cpu = _physical_cpu_count()
if axis == "workers":
return sorted(set([1, 2, 4, 8, min(16, cpu)]))
if axis == "iters":
Expand Down Expand Up @@ -1158,7 +1187,7 @@ def child_main(args) -> int:
if err is not None:
print(f"benchmark: invalid config: {err}", file=sys.stderr)
return 2
emit_soft_warnings(cfg, os.cpu_count() or 1)
emit_soft_warnings(cfg, _physical_cpu_count())
rep = run_single_point_body(cfg, repeat_index=0)
payload = {
"inputs": asdict(cfg),
Expand Down Expand Up @@ -1197,7 +1226,7 @@ def parent_main(args) -> int:
return 2

# Pre-spawn validation across every sweep point.
cpu = os.cpu_count() or 1
cpu = _physical_cpu_count()
derived_points = []
for value in sweep_values:
cfg = cfg_for_axis(base, args.sweep_axis, value)
Expand Down
49 changes: 45 additions & 4 deletions examples/boids.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,22 +352,28 @@ class Boids(pyglet.window.Window):
"""Pyglet window that renders a boids simulation."""

def __init__(self, width: int, height: int, num_boids: int,
show_overlay: bool = True):
show_overlay: bool = True, scale: float = 1.0):
"""Initialize the window and create boids.

:param width: Window width in pixels.
:param height: Window height in pixels.
:param num_boids: The number of boids to simulate.
:param show_overlay: Whether to render the boid count and
behavior-rate overlay in the bottom-left corner.
:param scale: Multiplier applied to the drawn boid triangle
size. ``1.0`` keeps the original 20x14 pixel triangle.
"""
pyglet.window.Window.__init__(self, width, height, "Boids")
pyglet.gl.glClearColor(1, 1, 1, 1)
self.batch = pyglet.graphics.Batch()
self.elapsed = 0
self.simulation = Simulation(num_boids, width, height)
self.num_behaviors = 0
self.num_frames = 0
self.total_behaviors = 0
self.total_elapsed = 0.0
self.samples = deque()
self.fps_samples = deque()
self.show_overlay = show_overlay

if show_overlay:
Expand All @@ -380,13 +386,24 @@ def __init__(self, width: int, height: int, num_boids: int,
"behavior/s: ",
font_size=24, x=5, y=50,
color=(100, 100, 100, 255))

self.fps_label = pyglet.text.Label(
"fps: ",
font_size=24, x=5, y=95,
color=(100, 100, 100, 255))
else:
self.num_boids_label = None
self.behaviors_label = None
self.fps_label = None

self.triangles: pyglet.shapes.Triangle = []
tip_x = 0.0
base_x = -20.0 * scale
base_y = 7.0 * scale
for _ in range(num_boids):
tri = pyglet.shapes.Triangle(0, 0, -20, +7, -20, -7,
tri = pyglet.shapes.Triangle(tip_x, 0,
base_x, +base_y,
base_x, -base_y,
color=(55, 255, 255, 255),
batch=self.batch)
tri.anchor_position = 0, 0
Expand All @@ -399,6 +416,7 @@ def on_draw(self):
if self.show_overlay:
self.num_boids_label.draw()
self.behaviors_label.draw()
self.fps_label.draw()

def on_close(self):
wait()
Expand All @@ -412,17 +430,26 @@ def update(self, delta_time: float):
self.elapsed += delta_time
self.simulation.step(self.width, self.height)
self.num_behaviors += self.simulation.num_behaviors
self.num_frames += 1
self.total_behaviors += self.simulation.num_behaviors
self.total_elapsed += delta_time

if self.elapsed > 1:
self.samples.append(self.num_behaviors / self.elapsed)
self.fps_samples.append(self.num_frames / self.elapsed)
self.num_behaviors = 0
self.num_frames = 0
self.elapsed = 0
if len(self.samples) > 10:
self.samples.popleft()
if len(self.fps_samples) > 10:
self.fps_samples.popleft()

if len(self.samples) > 3 and self.behaviors_label is not None:
behavior_rate = sum(self.samples) / len(self.samples)
self.behaviors_label.text = f"behavior/s: {behavior_rate:.0f}"
fps = sum(self.fps_samples) / len(self.fps_samples)
self.fps_label.text = f"fps: {fps:.1f}"

positions = self.simulation.positions
velocities = self.simulation.velocities
Expand Down Expand Up @@ -459,6 +486,9 @@ def update(self, delta_time: float):
parser.add_argument("--workers", type=int, default=None,
help="Number of BOC worker sub-interpreters. "
"Defaults to bocpy's default (CPU count - 1).")
parser.add_argument("--scale", type=float, default=1.0,
help="Multiplier applied to the drawn boid "
"triangle size (default: 1.0).")
args = parser.parse_args()

# Validate at the boundary; downstream code (Matrix sizing, hash modulo,
Expand All @@ -473,6 +503,8 @@ def update(self, delta_time: float):
parser.error("--fps must be positive")
if args.workers is not None and args.workers <= 0:
parser.error("--workers must be positive")
if args.scale <= 0:
parser.error("--scale must be positive")

# Start the BOC runtime explicitly so --workers takes effect for every
# mode.
Expand All @@ -486,7 +518,7 @@ def update(self, delta_time: float):
# The overlay (boid count / behavior rate) is suppressed in video
# mode so the rendered output stays clean.
boids = Boids(args.width, args.height, args.boids,
show_overlay=False)
show_overlay=False, scale=args.scale)

# Allow graceful close: override on_close to set a flag and return
# True so pyglet does not destroy the window mid-frame. The loop
Expand Down Expand Up @@ -630,11 +662,20 @@ def _on_close():
print(ff_stderr.decode("utf-8", errors="replace"), end="")
return

# Successful render: report the average behavior rate over the
# entire run, computed from cumulative counters (not the rolling
# 1s window used for the on-screen overlay).
if boids.total_elapsed > 0:
avg_rate = boids.total_behaviors / boids.total_elapsed
print(f"behavior/s (avg over {boids.total_elapsed:.1f}s of "
f"simulated time): {avg_rate:.0f} "
f"({boids.total_behaviors} behaviors)")

print(f"Wrote {args.output} ({frames_written} frames)"
f"{' (interrupted)' if boids.bocpy_video_closing else ''}")
return

boids = Boids(args.width, args.height, args.boids)
boids = Boids(args.width, args.height, args.boids, scale=args.scale)
pyglet.clock.schedule_interval(boids.update, 1 / args.fps)
pyglet.app.run()

Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions examples/sketches.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

def all_known_cheeses() -> list[str]:
"""Load the cheese inventory from disk."""
path = os.path.join(os.path.dirname(__file__), "assets", "cheese.txt")
path = os.path.join(os.path.dirname(__file__), "cheese.txt")
with open(path) as file:
return [line.strip() for line in file]

Expand All @@ -26,7 +26,7 @@ def is_available(logger, name: str) -> bool:

def menu() -> list[str]:
"""Load the menu and shuffle it."""
path = os.path.join(os.path.dirname(__file__), "assets", "menu.txt")
path = os.path.join(os.path.dirname(__file__), "menu.txt")
with open(path) as file:
menu = [line.strip() for line in file]
random.shuffle(menu)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ packages = ["bocpy", "bocpy.examples"]
"bocpy.examples" = "examples"

[tool.setuptools.package-data]
"bocpy.examples" = ["assets/*.txt"]
"bocpy.examples" = ["*.txt"]
Loading
Loading