diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 436adba805..1465dd1fa8 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -144,8 +144,7 @@ jobs:
             profile: "debug"
             sanitize: ""
             instrument: "none"
-          - &release-build
-            name: "release"
+          - name: "release"
             profile: "release"
             sanitize: "" # TODO: enable cfi and safe-stack when possible
             instrument: "none"
@@ -502,45 +501,71 @@ jobs:
           recipe_args: "${{ matrix.recipe.args }}"
       - *tmate
 
-  features:
+  concurrency:
     if: >-
       ${{
         needs.check_changes.outputs.devfiles == 'true'
         || startsWith(github.event.ref, 'refs/tags/v')
         || github.event_name == 'workflow_dispatch'
       }}
-    name: "features/${{ matrix.build.name }}/${{ matrix.features }}"
+    name: "concurrency"
     runs-on: "lab"
     needs:
       - check_changes
       - check
     permissions: *check-perms
-    env: *check-env
-    strategy:
-      fail-fast: false
-      max-parallel: 1
-      matrix:
-        include:
-          # The `loom` feature flips `concurrency::sync` to loom's
-          # primitives workspace-wide, which breaks crates that rely on
-          # `Weak`, `Arc::downgrade`, etc. (those aren't in
-          # `loom::sync`).  Scope the loom build to only the concurrency
-          # package (which hosts the quiescent tests) so workspace
-          # feature unification doesn't poison unrelated crates.
-          - build: *release-build
-            features: "loom"
-            test_package: "concurrency"
-          - build: *release-build
-            features: "shuttle"
-            test_package: ""
+    # This job doesn't use the `*check-env` anchor: that anchor's
+    # `JUST_VARS` references `matrix.build.*`, and this job has no
+    # matrix. Each step below sets `JUST_VARS` itself, inlining the
+    # docker_sock / debug_justfile / oci_repo settings that the
+    # anchor would have provided.
+    env:
+      USER: "runner"
     steps:
       - *checkout
       - *nix-setup
-      - name: "test/${{ matrix.features }}"
+      - name: "shuttle"
+        env:
+          JUST_VARS: >-
+            docker_sock=/run/docker/docker.sock
+            debug_justfile=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_justfile || false }}
+            profile=release
+            features=shuttle
+            oci_repo=ghcr.io
+        uses: *just
+        with:
+          recipe: "test"
+      - name: "shuttle_pct"
+        env:
+          JUST_VARS: >-
+            docker_sock=/run/docker/docker.sock
+            debug_justfile=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_justfile || false }}
+            profile=release
+            features=shuttle_pct
+            oci_repo=ghcr.io
+        uses: *just
+        with:
+          recipe: "test"
+      # The `loom` feature flips `concurrency::sync` to loom's
+      # primitives workspace-wide, which breaks crates that rely on
+      # `Weak`, `Arc::downgrade`, etc. (those aren't in
+      # `loom::sync`).  Scope the loom build to only the concurrency
+      # package (which hosts the core concurrency tests) so workspace
+      # feature unification doesn't poison unrelated crates.
+      #
+      # TODO: gate tests which can't be used with loom
+      - name: "loom"
+        env:
+          JUST_VARS: >-
+            docker_sock=/run/docker/docker.sock
+            debug_justfile=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_justfile || false }}
+            profile=release
+            features=loom
+            oci_repo=ghcr.io
         uses: *just
         with:
           recipe: "test"
-          recipe_args: "${{ matrix.test_package }}"
+          recipe_args: "concurrency"
       - *tmate
 
   vlab:
@@ -633,7 +658,7 @@ jobs:
     needs:
       - check
       - sanitize
-      - features
+      - concurrency
       - build
       - vlab
       - test_each
@@ -653,10 +678,10 @@ jobs:
         run: |
           echo '::error:: Some check job(s) failed'
           exit 1
-      - name: "Flag any features matrix failures"
-        if: ${{ needs.features.result != 'success' && needs.features.result != 'skipped' }}
+      - name: "Flag any concurrency job failures"
+        if: ${{ needs.concurrency.result != 'success' && needs.concurrency.result != 'skipped' }}
         run: |
-          echo '::error:: Some features job(s) failed'
+          echo '::error:: concurrency job failed'
           exit 1
       - name: "Flag any test_each matrix failures"
         if: ${{ needs.test_each.result != 'success' && needs.test_each.result != 'skipped' }}
diff --git a/Cargo.lock b/Cargo.lock
index 0cb905debb..515ab160b3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -513,7 +513,7 @@ version = "0.13.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9a21a3b022507b9edd2050caf370d945e398c1a7c8455531220fa3968c45d29e"
 dependencies = [
- "proc-macro-crate",
+ "proc-macro-crate 2.0.0",
  "proc-macro2",
  "quote",
  "syn 2.0.117",
@@ -1252,6 +1252,7 @@ dependencies = [
 name = "dataplane-concurrency-macros"
 version = "0.21.0"
 dependencies = [
+ "proc-macro-crate 3.5.0",
  "proc-macro2",
  "quote",
  "syn 2.0.117",
@@ -1285,10 +1286,14 @@ dependencies = [
 name = "dataplane-dpdk"
 version = "0.21.0"
 dependencies = [
+ "bolero",
+ "dataplane-concurrency",
  "dataplane-dpdk-sys",
  "dataplane-dpdk-sysroot-helper",
  "dataplane-errno",
+ "dataplane-id",
  "dataplane-net",
+ "nix 0.31.3",
  "serde",
  "thiserror",
  "tracing",
@@ -4319,6 +4324,15 @@ dependencies = [
  "toml_edit 0.20.7",
 ]
 
+[[package]]
+name = "proc-macro-crate"
+version = "3.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f"
+dependencies = [
+ "toml_edit 0.25.11+spec-1.1.0",
+]
+
 [[package]]
 name = "proc-macro-error-attr2"
 version = "2.0.0"
@@ -5729,7 +5743,7 @@ checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
 dependencies = [
  "serde",
  "serde_spanned",
- "toml_datetime",
+ "toml_datetime 0.6.11",
  "toml_edit 0.22.27",
 ]
 
@@ -5742,6 +5756,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "toml_datetime"
+version = "1.1.1+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7"
+dependencies = [
+ "serde_core",
+]
+
 [[package]]
 name = "toml_edit"
 version = "0.20.7"
@@ -5749,7 +5772,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81"
 dependencies = [
  "indexmap 2.14.0",
- "toml_datetime",
+ "toml_datetime 0.6.11",
  "winnow 0.5.40",
 ]
 
@@ -5762,11 +5785,32 @@ dependencies = [
  "indexmap 2.14.0",
  "serde",
  "serde_spanned",
- "toml_datetime",
+ "toml_datetime 0.6.11",
  "toml_write",
  "winnow 0.7.15",
 ]
 
+[[package]]
+name = "toml_edit"
+version = "0.25.11+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b"
+dependencies = [
+ "indexmap 2.14.0",
+ "toml_datetime 1.1.1+spec-1.1.0",
+ "toml_parser",
+ "winnow 1.0.3",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.1.2+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
+dependencies = [
+ "winnow 1.0.3",
+]
+
 [[package]]
 name = "toml_write"
 version = "0.1.2"
@@ -6385,6 +6429,15 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "winnow"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "wit-bindgen"
 version = "0.51.0"
diff --git a/concurrency-macros/Cargo.toml b/concurrency-macros/Cargo.toml
index cbd7b35dd1..4a415d3b01 100644
--- a/concurrency-macros/Cargo.toml
+++ b/concurrency-macros/Cargo.toml
@@ -14,6 +14,12 @@ shuttle = []
 silence_clippy = []
 
 [dependencies]
+# `proc-macro-crate` resolves the consumer's actual import name for
+# `dataplane-concurrency`. This crate is publishable, so the `test`
+# macro cannot assume a fixed `::concurrency` alias -- workspace
+# consumers often rename it, external users typically don't. See
+# `pub fn test` for how the resolution feeds into the emitted path.
+proc-macro-crate = { workspace = true, default-features = true }
 proc-macro2 = { workspace = true, default-features = true }
 quote = { workspace = true, default-features = true }
 syn = { workspace = true, default-features = true, features = ["full"] }
diff --git a/concurrency-macros/src/lib.rs b/concurrency-macros/src/lib.rs
index 0239477e10..8d2ee227eb 100644
--- a/concurrency-macros/src/lib.rs
+++ b/concurrency-macros/src/lib.rs
@@ -2,13 +2,45 @@
 // Copyright Open Network Fabric Authors
 
 use proc_macro::TokenStream;
+use proc_macro_crate::{FoundCrate, crate_name};
+use proc_macro2::{Span, TokenStream as TokenStream2};
 use quote::quote;
 use syn::{
-    Ident, Item,
+    Ident, Item, ItemFn,
     parse::{Parse, ParseStream},
     parse_macro_input,
 };
 
+/// Resolve a path prefix for `dataplane-concurrency` in the consumer's
+/// `Cargo.toml`. Returns a token stream that resolves to the crate root,
+/// so callers can append `::stress` or `::with_loom` etc.
+///
+/// * Workspace consumer with `concurrency = { package = "dataplane-concurrency", ... }`
+///   in its `Cargo.toml`: returns `::concurrency`.
+/// * External consumer with `dataplane-concurrency = "..."` directly:
+///   returns `::dataplane_concurrency`.
+/// * `dataplane-concurrency`'s own integration tests: returns
+///   `::dataplane_concurrency` (which requires the test file to do
+///   `extern crate dataplane_concurrency;` -- cargo doesn't let a crate
+///   list itself as a regular dev-dep, but `extern crate` works in the
+///   integration test).
+fn concurrency_crate_path() -> TokenStream2 {
+    match crate_name("dataplane-concurrency") {
+        Ok(FoundCrate::Itself) => {
+            let ident = Ident::new("dataplane_concurrency", Span::call_site());
+            quote! { ::#ident }
+        }
+        Ok(FoundCrate::Name(name)) => {
+            let ident = Ident::new(&name, Span::call_site());
+            quote! { ::#ident }
+        }
+        Err(_) => {
+            let ident = Ident::new("dataplane_concurrency", Span::call_site());
+            quote! { ::#ident }
+        }
+    }
+}
+
 struct ConcurrencyModeArgs {
     mode: Ident,
 }
@@ -38,20 +70,21 @@ pub fn concurrency_mode(attr: TokenStream, item: TokenStream) -> TokenStream {
     let item = parse_macro_input!(item as Item);
 
     let mode = args.mode.to_string();
+    let krate = concurrency_crate_path();
 
     let output = match mode.as_str() {
         "shuttle" => quote! {
-            ::concurrency::with_shuttle! {
+            #krate::with_shuttle! {
                 #item
             }
         },
         "loom" => quote! {
-            ::concurrency::with_loom! {
+            #krate::with_loom! {
                 #item
             }
         },
         "std" => quote! {
-            ::concurrency::with_std! {
+            #krate::with_std! {
                 #item
             }
         },
@@ -67,3 +100,143 @@ pub fn concurrency_mode(attr: TokenStream, item: TokenStream) -> TokenStream {
 
     output.into()
 }
+
+/// Mark a function as a test that runs under whichever concurrency backend
+/// is currently selected on `dataplane-concurrency`.
+///
+/// Under the default (production) backend, expands to a flat
+/// `#[test] fn <name>() { concurrency::stress(|| { original }) }`,
+/// which calls the body once.
+///
+/// Under any model-checker backend (`loom`, `shuttle`, `shuttle_pct`,
+/// `shuttle_dfs`), expands to a nested module so the test's binary
+/// path identifies the active backend in nextest reports / JUnit
+/// output:
+///
+/// ```text
+/// // #[concurrency::test] fn some_test() { body }
+/// // under `--features loom`:
+/// mod some_test {
+///     mod concurrency_model {
+///         #[test]
+///         fn loom() { concurrency::stress(|| body) }
+///     }
+/// }
+/// ```
+///
+/// The same shape applies for `shuttle` / `shuttle_pct` / `shuttle_dfs`,
+/// each writing the function name that names the active backend.
+/// Nextest filters like `-E 'test(/concurrency_model::loom$/)'` then
+/// pick out the loom-backed runs cleanly without having to grep on
+/// binary names.
+///
+/// # Example
+///
+/// ```ignore
+/// #[concurrency::test]
+/// fn snapshot_observes_a_legal_value() {
+///     // ... body uses concurrency::sync, concurrency::thread ...
+/// }
+/// ```
+///
+/// The function must take no arguments and return `()`. The body is
+/// captured as a closure, so it must be `Fn() + Send + Sync + 'static`
+/// (no borrows of locals, no `FnOnce`-only constructs). This matches
+/// what `loom::model` and `shuttle::check_*` require.
+///
+/// # Limitations
+///
+/// * **Single-threaded bodies fail under `shuttle_pct`.** Shuttle's PCT
+///   scheduler panics at runtime if the test closure does not exercise
+///   any concurrent atomic / thread operation (no `thread::spawn`, no
+///   contended `Mutex`/`Arc`). The detection is dynamic, so the macro
+///   cannot reject these statically; if you need such a test, gate it
+///   with `#[cfg(not(feature = "shuttle_pct"))]` or use a regular
+///   `#[test]` for the default-only smoke check.
+/// * **Async bodies and arguments are rejected at parse time** with a
+///   clear compile error.
+#[proc_macro_attribute]
+pub fn test(_attr: TokenStream, item: TokenStream) -> TokenStream {
+    let func = parse_macro_input!(item as ItemFn);
+
+    let attrs = &func.attrs;
+    let vis = &func.vis;
+    let sig = &func.sig;
+    let block = &func.block;
+    let fn_name = &sig.ident;
+
+    if let Some(asyncness) = sig.asyncness {
+        return syn::Error::new_spanned(
+            asyncness,
+            "#[concurrency::test] does not support async functions yet",
+        )
+        .to_compile_error()
+        .into();
+    }
+    if !sig.inputs.is_empty() {
+        return syn::Error::new_spanned(
+            &sig.inputs,
+            "#[concurrency::test] functions must take no arguments",
+        )
+        .to_compile_error()
+        .into();
+    }
+
+    let krate = concurrency_crate_path();
+    // Default backend: flat `#[test] fn <name>() { ... }`. No nested
+    // module wrapping -- the production code path runs the body once,
+    // and there is no second backend to disambiguate from.
+    //
+    // Model-checker backends: emit `mod <fn_name> { mod concurrency_model
+    // { #[test] fn <backend>() { ... } } }`. The leaf function name
+    // identifies the active backend, so a nextest report shows entries
+    // like `some_test::concurrency_model::loom` and a filter like
+    // `-E 'test(/concurrency_model::loom$/)'` picks them out
+    // unambiguously.
+    quote! {
+        #[cfg(not(any(feature = "loom", feature = "shuttle")))]
+        #[::core::prelude::v1::test]
+        #(#attrs)*
+        #vis #sig {
+            #krate::stress(|| #block);
+        }
+
+        #[cfg(any(feature = "loom", feature = "shuttle"))]
+        #[allow(non_snake_case)]
+        mod #fn_name {
+            use super::*;
+            mod concurrency_model {
+                use super::*;
+
+                #[cfg(feature = "loom")]
+                #[::core::prelude::v1::test]
+                #(#attrs)*
+                fn loom() {
+                    #krate::stress(|| #block);
+                }
+
+                #[cfg(all(feature = "shuttle", not(feature = "shuttle_pct")))]
+                #[::core::prelude::v1::test]
+                #(#attrs)*
+                fn shuttle() {
+                    #krate::stress(|| #block);
+                }
+
+                #[cfg(all(feature = "shuttle_pct", not(feature = "shuttle_dfs")))]
+                #[::core::prelude::v1::test]
+                #(#attrs)*
+                fn shuttle_pct() {
+                    #krate::stress(|| #block);
+                }
+
+                #[cfg(feature = "shuttle_dfs")]
+                #[::core::prelude::v1::test]
+                #(#attrs)*
+                fn shuttle_dfs() {
+                    #krate::stress(|| #block);
+                }
+            }
+        }
+    }
+    .into()
+}
diff --git a/concurrency/Cargo.toml b/concurrency/Cargo.toml
index 0ef625da08..3cd3e072dd 100644
--- a/concurrency/Cargo.toml
+++ b/concurrency/Cargo.toml
@@ -14,15 +14,20 @@ parking_lot = ["dep:parking_lot"]
 #   * `shuttle`     -- shuttle with the random scheduler (the default
 #                      for first-time users -- you almost always want
 #                      this one).
-#   * `shuttle_pct` -- shuttle with the PCT scheduler. Use when you
+#   * `shuttle_pct` -- shuttle with the PCT scheduler.  Use when you
 #                      want to bias toward rare interleavings.
-#   * `shuttle_dfs` -- shuttle with the DFS scheduler. Use for
+#   * `shuttle_dfs` -- shuttle with the DFS scheduler.  Use for
 #                      exhaustive small-state exploration.
 #
-# Arranged as a chain (`shuttle_dfs` -> `shuttle_pct` -> `shuttle`)
-# so a single `feature = "shuttle"` cfg check is true under every
-# variant. The scheduler is selected at runtime by
-# `concurrency::stress` (added in a later PR of this stack).
+# All three share the same `dep:shuttle` machinery; only the scheduler
+# selected at runtime differs.  See `concurrency::stress` for the
+# dispatch and the `#[concurrency::test]` attribute macro for the
+# write-once-run-everywhere wrapper.
+#
+# The features form a chain (`shuttle_dfs` -> `shuttle_pct` -> `shuttle`)
+# so that any `feature = "shuttle"` cfg check is true under all three
+# variants.  cfg_select-style precedence (most-specific first) still
+# picks the right scheduler.  See `concurrency::stress`.
 shuttle = ["dep:shuttle", "concurrency-macros/shuttle"]
 shuttle_pct = ["shuttle"]
 shuttle_dfs = ["shuttle_pct"]
diff --git a/concurrency/QUIESCENT.md b/concurrency/QUIESCENT.md
index 8e7cd7f271..e12898be30 100644
--- a/concurrency/QUIESCENT.md
+++ b/concurrency/QUIESCENT.md
@@ -24,7 +24,7 @@ graph TD
 ## Quick start
 
 ```rust,ignore
-use dataplane_quiescent::channel;
+use dataplane_concurrency::quiescent::channel;
 
 #[derive(Debug)]
 struct MyConfig { /* ... */ }
@@ -142,7 +142,7 @@ profile:
 Construction:
 
 ```rust,ignore
-use dataplane_quiescent::channel;
+use dataplane_concurrency::quiescent::channel;
 
 let publisher = channel(initial_value);
 ```
@@ -354,7 +354,7 @@ the `Publisher`. This makes the destructor-thread-affinity guarantee a
 In practice:
 
 ```rust,ignore
-use dataplane_quiescent::channel;
+use dataplane_concurrency::quiescent::channel;
 
 let publisher = channel(initial);
 
diff --git a/concurrency/src/lib.rs b/concurrency/src/lib.rs
index 10415eb60a..91035d61e0 100644
--- a/concurrency/src/lib.rs
+++ b/concurrency/src/lib.rs
@@ -1,6 +1,64 @@
 // SPDX-License-Identifier: Apache-2.0
 // Copyright Open Network Fabric Authors
 
+//! Backend-routed concurrency primitives for the dataplane workspace.
+//!
+//! Re-exports a uniform `parking_lot`-shaped surface that compiles
+//! unchanged under the production backend, `loom`, and `shuttle*`.
+//! `#[concurrency::test]` + `concurrency::stress` let a single source
+//! file exercise either the production code path or the model-checker
+//! of choice.
+//!
+//! # "Compiles under loom" != "exhaustively checked under loom"
+//!
+//! Several documented shim limitations let code keep compiling
+//! against the facade without being meaningfully model-checked for
+//! the schedules that matter. Authors writing new model-check
+//! coverage should be aware of the gaps:
+//!
+//! * **`Weak<T>` under loom** holds a strong clone of the inner
+//!   `Arc` (loom 0.7 ships no `Weak` of its own), so
+//!   `Weak::upgrade` *always* returns `Some` after a successful
+//!   `Arc::downgrade`. The race a loom test would want to expose --
+//!   "the last `Arc` dropped between my `Weak::upgrade` check and my
+//!   use" -- is unreachable. Code that depends on the
+//!   upgrade-fails-after-last-strong-drop semantics needs a different
+//!   testing strategy (real OS threads + tsan, or a hand-rolled
+//!   model). Concrete workspace consequence: NAT's allocator/port-
+//!   forwarder paths use `Weak::upgrade().is_none()` as the liveness
+//!   signal for cleanup (see `nat/src/stateful/apalloc/alloc.rs` and
+//!   `port_alloc.rs`); under loom that signal never fires, so those
+//!   paths are *not* exercised. NAT is not in the loom test matrix
+//!   today, which is consistent with that limit; do not add it
+//!   without first reworking the Weak usage or extending the shim.
+//! * **`RwLock::upgradable_read` under loom/shuttle** is implemented
+//!   on top of an exclusive `write()`. Sound -- no schedule
+//!   `parking_lot` allows is forbidden here -- but lossy: the model
+//!   checker never explores the many-readers-plus-one-upgradable
+//!   schedule that `parking_lot` permits. Tests that hinge on that
+//!   interleaving need `RwLock<T>` with explicit `read()` then
+//!   `write()`, or a richer state machine in the facade.
+//! * **`static FOO: Mutex<T> = Mutex::new(...)` does not compile
+//!   under loom.** `loom::sync::Mutex::new` is plain `fn`, not
+//!   `const fn`, so a static initialiser fails to typecheck. Use
+//!   `OnceLock` for the static (the facade re-exports
+//!   `std::sync::OnceLock` under all backends) or move the
+//!   construction into a runtime initialiser gated by
+//!   `#[concurrency_mode(std)]`.
+//! * **`OnceLock` under loom/shuttle** is the real `std::sync::OnceLock`,
+//!   not a model-aware shim. Loom and shuttle do not see the
+//!   atomics inside `OnceLock::get_or_init`, so tests whose
+//!   correctness depends on the *ordering* of a once-initialised
+//!   publication are not covered. `OnceLock` is sound here for the
+//!   "compute lazily once" pattern; the publish-ordering story
+//!   needs a separate `AtomicX` + `Acquire/Release` pair that the
+//!   model checker *can* trace.
+//!
+//! The `_strict_provenance` feature forces the `Mutex<Arc<T>>`
+//! fallback slot even under the default backend; the CI miri matrix
+//! exercises both `ArcSwap` (production) and that fallback to widen
+//! coverage.
+
 #![deny(
     unsafe_code,
     missing_docs,
@@ -13,38 +71,23 @@
 #![allow(missing_docs)]
 
 pub mod macros;
+mod stress;
 pub mod sync;
+pub mod thread;
+
+// `stress` is `pub` so the expansion of `#[concurrency::test]` in
+// downstream crates can name it. It is not part of the recommended
+// public surface; the macro is. `#[doc(hidden)]` keeps the symbol
+// off rustdoc, leaving users to land on `#[concurrency::test]`.
+#[doc(hidden)]
+pub use stress::stress;
 
 #[cfg(all(miri, any(feature = "shuttle", feature = "loom")))]
 compile_error!("miri does not meaningfully support 'loom' or 'shuttle'");
 
-#[cfg(not(any(feature = "loom", feature = "shuttle")))]
-pub use std::thread;
-
-#[cfg(all(
-    feature = "loom",
-    not(feature = "shuttle"),
-    not(feature = "silence_clippy")
-))]
-pub use loom::thread;
-
-#[cfg(all(
-    feature = "shuttle",
-    not(feature = "loom"),
-    not(feature = "silence_clippy")
-))]
-pub use shuttle::thread;
-
 #[cfg(all(feature = "shuttle", feature = "loom", not(feature = "silence_clippy")))]
 compile_error!("Cannot enable both 'loom' and 'shuttle' features at the same time");
 
-//////////////////////
-// This is a workaround to silence clippy warnings when both loom and shuttle
-// features are enabled in the clippy checks which uses --all-features.
-#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))]
-pub use std::thread;
-//////////////////////
-
 #[cfg(all(feature = "silence_clippy", not(feature = "shuttle")))]
 compile_error!("silence_clippy manually enabled, should only be enabled by --all-features");
 
diff --git a/concurrency/src/macros.rs b/concurrency/src/macros.rs
index 6dbe7a5658..63e88ef4f6 100644
--- a/concurrency/src/macros.rs
+++ b/concurrency/src/macros.rs
@@ -160,4 +160,4 @@ macro_rules! with_std {
     ($($item:item)*) => {};
 }
 
-pub use concurrency_macros::concurrency_mode;
+pub use concurrency_macros::{concurrency_mode, test};
diff --git a/concurrency/src/quiescent.rs b/concurrency/src/quiescent.rs
index ef21ca0d56..81ed7514a8 100644
--- a/concurrency/src/quiescent.rs
+++ b/concurrency/src/quiescent.rs
@@ -196,18 +196,17 @@ impl Version {
             // LCOV_EXCL_START - reaching this path is itself the failure;
             // chasing coverage of it is absurd.  See the comment below.
             core::hint::cold_path();
-            #[allow(clippy::panic)]
-            {
-                // This whole path is technically reachable, but only technically.
-                // If you got config updates 1B times per second on average it would
-                // still take 584 years to wrap around.  Even that requires us to receive
-                // and process config updates faster than the line rate of an 800Gb/s NIC.
-                // For hundreds of years.  With no reboot.
-                //
-                // The only realistic way to reach this point is via a bug in this code,
-                // not via normal operation.
-                panic!("Version wrapped!  This is a bug");
-            }
+            // This whole path is technically reachable, but only technically.
+            // If you got config updates 1B times per second on average it would
+            // still take 584 years to wrap around.  Even that requires us to receive
+            // and process config updates faster than the line rate of an 800Gb/s NIC.
+            // For hundreds of years.  With no reboot.
+            //
+            // The only realistic way to reach this point is via a bug in this code,
+            // not via normal operation -- this is exactly what `unreachable!()` is
+            // for per development/programming-rules/error-handling.md. The
+            // formatting variant is non-const, so we use the bare form.
+            unreachable!()
             // LCOV_EXCL_STOP
         }
     }
diff --git a/concurrency/src/slot.rs b/concurrency/src/slot.rs
index 4b42d55868..291fbd846b 100644
--- a/concurrency/src/slot.rs
+++ b/concurrency/src/slot.rs
@@ -15,6 +15,17 @@
 //! protocol -- atomic publish, atomic load -- which is all the model
 //! checker needs to see.
 //!
+//! **Important coverage limit.** Model-checker tests that go through
+//! `Slot` / `SlotOption` exercise the *protocol* of a single-slot
+//! atomic publication: one writer swaps, readers see either old or
+//! new, no torn read.  They do *not* exercise `arc_swap`'s internal
+//! hazard-pointer machinery, which is what the production path
+//! actually runs.  A bug inside `arc_swap` itself (e.g. a missed
+//! retire, an incorrect epoch comparison) cannot surface under loom
+//! or shuttle here.  If you want coverage of `arc_swap`'s internals,
+//! the miri job (which runs against the real `ArcSwap` in
+//! permissive-provenance mode) is where it lives.
+//!
 //! [`Subscriber::snapshot`]: crate::Subscriber::snapshot
 
 // Strict provenance checks fail with arc-swap since it uses hazard pointers and does not (yet) use the new
diff --git a/concurrency/src/stress.rs b/concurrency/src/stress.rs
new file mode 100644
index 0000000000..243b48ec2b
--- /dev/null
+++ b/concurrency/src/stress.rs
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Backend dispatch for model-checking tests.
+//!
+//! [`stress`] runs `body` under whichever concurrency backend the crate
+//! was compiled against:
+//!
+//! * default backend -- direct call, no scheduling exploration
+//! * `loom` feature -- `loom::model`
+//! * `shuttle` feature -- `shuttle::check_random`
+//! * `shuttle_pct` feature -- `shuttle::check_pct`
+//! * `shuttle_dfs` feature -- `shuttle::check_dfs` (capped at `ITERATIONS`)
+//!
+//! `lib.rs` `compile_error!`s if both `loom` and any `shuttle*` are
+//! enabled at once, so only one branch should ever fire in a real
+//! build. Under `--all-features` the `silence_clippy` escape hatch
+//! suppresses that error and the `cfg_select!` below resolves the
+//! arms in this order: `loom > shuttle_dfs > shuttle_pct > shuttle`.
+//! Same precedence the routing in `concurrency::sync` uses.
+//!
+//! Tests written once exercise any of these by toggling features on the
+//! crate. The `#[concurrency::test]` attribute (in `concurrency-macros`)
+//! is a thin wrapper that calls this function for you.
+
+/// Run `body` under the currently selected concurrency backend.
+///
+/// See the module docs for the per-backend dispatch table.
+pub fn stress<F>(body: F)
+where
+    F: Fn() + Send + Sync + 'static,
+{
+    // The feature lattice in `Cargo.toml` makes `feature = "shuttle"`
+    // true under any shuttle variant, so the const-cfgs here are
+    // correspondingly simple: ITERATIONS is needed by any shuttle arm,
+    // SCHEDULES is only consumed by the shuttle_pct arm.
+    #[cfg(all(not(feature = "loom"), feature = "shuttle"))]
+    const ITERATIONS: usize = 16;
+    #[cfg(all(
+        not(feature = "loom"),
+        not(feature = "shuttle_dfs"),
+        feature = "shuttle_pct"
+    ))]
+    const SCHEDULES: usize = 3;
+    cfg_select! {
+        feature = "loom" => { loom::model(body); },
+        feature = "shuttle_dfs" => { shuttle::check_dfs(body, Some(ITERATIONS)); },
+        feature = "shuttle_pct" => { shuttle::check_pct(body, ITERATIONS, SCHEDULES); },
+        feature = "shuttle" => { shuttle::check_random(body, ITERATIONS); },
+        not(any(feature = "loom", feature = "shuttle")) => { body(); },
+        _ => compile_error!(
+            "stress: a model-checker feature is enabled but no dispatch \
+             arm matched. Either an explicit arm above is missing, or \
+             the `not(any(...))` default needs widening to cover the \
+             new feature.",
+        ),
+    }
+}
diff --git a/concurrency/src/sync/mod.rs b/concurrency/src/sync/mod.rs
index e70d96b4e9..2ea01b9a6f 100644
--- a/concurrency/src/sync/mod.rs
+++ b/concurrency/src/sync/mod.rs
@@ -13,24 +13,59 @@
 //!   `Weak`).
 //! * `shuttle` / `shuttle_pct` / `shuttle_dfs` features: poison-as-panic
 //!   wrapper around `shuttle::sync`. All three flavours share one
-//!   wrapper module; the feature lattice means a single
-//!   `feature = "shuttle"` check is true under every variant. The
-//!   scheduler difference is runtime-only (see `concurrency::stress`,
-//!   added in a later PR).
+//!   wrapper module; the scheduler difference is runtime-only (see
+//!   `concurrency::stress`).
 //! * `parking_lot` feature (default): zero-cost re-export of
 //!   `parking_lot`'s naked-guard locks; the production hot path.
 //!   Skipped when `_strict_provenance` is on, even if `parking_lot`
 //!   is also on, because `parking_lot_core::word_lock` uses
 //!   integer-to-pointer casts that miri's strict-provenance mode
 //!   rejects; the CI miri job exercises the fallback slot under
-//!   strict provenance, and that needs the sync surface to come from
-//!   `std::sync`.
+//!   strict provenance, and that needs the sync surface to come
+//!   from `std::sync`.
 //! * Otherwise: `std_backend` -- a thin poison-as-panic wrapper around
-//!   `std::sync`. Lets `--no-default-features` and `_strict_provenance`
-//!   builds compile without depending on `parking_lot`.
+//!   `std::sync`. Lets `--no-default-features` and
+//!   `_strict_provenance` builds compile without depending on
+//!   `parking_lot`.
+//!
+//! # Portability footguns the facade *does not* paper over
+//!
+//! The wrapped backends are observationally compatible with the
+//! production `parking_lot` surface for the things call sites
+//! actually use, but a few API details diverge in ways that matter
+//! to anyone writing a static, a model-checked test, or code that
+//! relies on `parking_lot`-specific schedules:
+//!
+//! * **`Mutex::new` / `RwLock::new` are not `const fn` under
+//!   `loom`/`shuttle*`.** loom's `Mutex::new` is plain `fn` because
+//!   each instance registers with the loom executor; shuttle's is
+//!   `const fn`, but the facade exposes the lowest common
+//!   denominator. So `static M: Mutex<T> = Mutex::new(...)` compiles
+//!   under the default and `parking_lot` backends and fails to
+//!   typecheck under the model-checker backends. Workaround for
+//!   tests that need a static: wrap the static in `OnceLock`, or
+//!   construct the `Mutex` inside the test body.
+//!
+//! * **`OnceLock` under `loom`/`shuttle*` is re-exported from
+//!   `std::sync` unchanged.** It is sound for laziness, but it uses
+//!   uninstrumented atomics inside, so the model checker does *not*
+//!   explore the orderings around `OnceLock::get_or_init`. Tests
+//!   whose correctness depends on the publication ordering of a
+//!   once-initialised cell need to model that ordering explicitly
+//!   (e.g. an `Arc<T>` + an explicit `Acquire` load on the
+//!   subscriber, both of which loom *does* model).
+//!
+//! * **`RwLock::upgradable_read` under `loom`/`shuttle*` takes an
+//!   exclusive write lock.** Sound -- no schedule that `parking_lot`
+//!   would allow is forbidden -- but lossy: the model checker never
+//!   explores the many-readers-plus-one-upgradable schedule that
+//!   `parking_lot` permits. Code whose correctness hinges on that
+//!   specific interleaving needs an explicit `read()` then `write()`
+//!   pair (which loom *can* model), or a richer state machine in
+//!   the facade.
 
-// loom takes priority so the model checker can drive its own internal state
-// (used for tests that opt loom in explicitly).
+// loom takes priority so the model checker can drive its own internal
+// state (used for tests that opt loom in explicitly).
 #[cfg(all(feature = "loom", not(feature = "silence_clippy")))]
 mod loom_backend;
 #[cfg(all(feature = "loom", not(feature = "silence_clippy")))]
@@ -44,15 +79,15 @@ pub use shuttle_backend::*;
 #[cfg(all(
     not(feature = "loom"),
     not(feature = "shuttle"),
-    not(feature = "_strict_provenance"),
     feature = "parking_lot",
+    not(feature = "_strict_provenance"),
 ))]
 mod parking_lot_backend;
 #[cfg(all(
     not(feature = "loom"),
     not(feature = "shuttle"),
-    not(feature = "_strict_provenance"),
     feature = "parking_lot",
+    not(feature = "_strict_provenance"),
 ))]
 pub use parking_lot_backend::*;
 
diff --git a/concurrency/src/sync/std_backend.rs b/concurrency/src/sync/std_backend.rs
index 263cba6e07..4313a87f50 100644
--- a/concurrency/src/sync/std_backend.rs
+++ b/concurrency/src/sync/std_backend.rs
@@ -207,7 +207,7 @@ impl<T> RwLock<T> {
     /// Acquire an upgradable read guard.
     ///
     /// std `RwLock` has no native upgradable-read; this is implemented
-    /// as an exclusive `write()`. Subsequent backends (parking_lot)
+    /// as an exclusive `write()`. Subsequent backends (`parking_lot`)
     /// will replace this with a true upgradable read; meanwhile the
     /// surface is consistent across backends, sound in all cases, and
     /// merely loses the many-readers-plus-one-upgradable schedule that
diff --git a/concurrency/src/thread/loom_scope.rs b/concurrency/src/thread/loom_scope.rs
new file mode 100644
index 0000000000..369ac1d48b
--- /dev/null
+++ b/concurrency/src/thread/loom_scope.rs
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Loom-only `thread::scope` shim.
+//!
+//! Loom 0.7 does not ship `scope`. We provide one by storing every
+//! spawned `JoinHandle` on the `Scope` itself and joining each handle
+//! from the caller of `scope()` before returning. This mirrors what
+//! `std::thread::scope` does internally (its `JoinInner::drop` joins the
+//! OS thread before signaling the main thread): every spawned thread
+//! is fully terminated -- including all its captured drops -- before
+//! `scope()` returns, so any `'scope`-bounded borrow the thread held is
+//! released *on the thread that joined*, never on the spawned thread
+//! after `'scope` has ended.
+//!
+//! The shim's safety contract therefore matches std's: spawned closures
+//! may borrow data of any lifetime that outlives the scope (`'env`).
+//! Internally we lift the closure's `'scope` lifetime to `'static` with
+//! a single `mem::transmute`, sound because of the join-before-return
+//! guarantee.
+//!
+//! The keepalive trait object stored in `ScopeInner::pending` keeps its
+//! honest `'scope` bound; the dropck-vs-HRTB tension that requires for
+//! the closure transmute is resolved on this side by wrapping
+//! `ScopeInner<'scope>` in `ManuallyDrop` so that `Scope`'s implicit
+//! drop never destructs `'scope`-bearing data, and then explicitly
+//! `ManuallyDrop::drop`ping the inner at the end of `scope()` while
+//! `'scope` is still live. See the SAFETY comments at the manual-drop
+//! site and at the closure transmute for details.
+//!
+//! Loom's `thread::spawn` is stricter than std's `spawn_unchecked` --
+//! it requires `T: 'static` for the return type as well as the closure.
+//! To accommodate, the spawned closure here is wrapped to return `()`
+//! and write the user-visible `T` into an `Arc<Mutex<Option<T>>>` that
+//! `ScopedJoinHandle::join` reads back. The wrapper itself returns `()`
+//! so loom's `'static` bound is trivially satisfied.
+//!
+//! ## Why the `result_slot` is held in three places, and how drop
+//! affinity is enforced
+//!
+//! Each call to [`Scope::spawn`] produces three references to the same
+//! `Arc<Mutex<Option<T>>>`:
+//!
+//! 1. **The spawned thread's wrapper closure** writes the user's `T`
+//!    into the slot when the user closure returns.
+//! 2. **The user's [`ScopedJoinHandle`]** lets `.join()` take `T` out;
+//!    if the handle is dropped without joining, its clone simply
+//!    decrements the strong count.
+//! 3. **The `Scope`'s slot keepalive** is a type-erased third clone
+//!    held in `ScopeInner::pending`. The auto-join loop in `scope()`
+//!    walks every pending entry, joins its `JoinHandle`, then calls
+//!    `ResultKeepalive::take_payload` on the keepalive to extract the
+//!    `T` and drop it **on the main thread**. The last `Arc` clone
+//!    (which might be on the spawned thread, if loom's notify fired
+//!    before the closure's capture-drop completed) then frees an empty
+//!    `Mutex<Option<T>>` shell with nothing left to destruct.
+//!
+//! Earlier revisions tried to enforce drop affinity by asserting at
+//! teardown that `strong_count == 1`. That works for `std::thread::
+//! scope`, which synchronously waits for the spawned thread's full
+//! termination (including capture drops), but not for loom: loom's
+//! `JoinHandle::join` is satisfied by the spawned thread's `notify`,
+//! which is sequenced *after* `f()` returns but *before* the runtime
+//! has finished dropping the box that owned the closure's captures.
+//! A schedule where main reaches the assertion in that window
+//! observes `strong_count == 2`, so we drop the assertion and run the
+//! extract-and-drop on main thread explicitly.
+
+// The shim has two unsafe operations: (1) a `mem::transmute` that
+// lifts the spawned closure's `'scope` lifetime to `'static`, since
+// loom 0.7 has no `spawn_unchecked`; (2) an explicit
+// `ManuallyDrop::drop` of the inner `ScopeInner<'scope>` at the end
+// of `scope()`, which lets the keepalive trait objects keep their
+// honest `'scope` bound while dropck does not see them at `scope`'s
+// auto-drop. Both are sound because of the join-before-return
+// contract; see the per-site SAFETY comments. The crate root denies
+// `unsafe_code`, so allow it locally.
+#![allow(unsafe_code)]
+// The shim panics on internal invariant violations -- same as std's
+// `thread::scope`. The crate root denies `clippy::panic`/`expect_used`;
+// allow them locally.
+#![allow(clippy::panic, clippy::expect_used)]
+// `Scope::scope` field is a PhantomData invariance marker matching std's
+// internal layout; the name aligns with the lifetime parameter, not a
+// stylistic choice.
+#![allow(clippy::struct_field_names)]
+
+use core::marker::PhantomData;
+use core::panic::AssertUnwindSafe;
+use loom::sync::Arc;
+use loom::thread::{self, JoinHandle};
+use std::panic::{catch_unwind, resume_unwind};
+
+use crate::sync::Mutex;
+
+/// Shared slot for a `JoinHandle<()>` that may be claimed either by
+/// the user via [`ScopedJoinHandle::join`] or by [`scope`]'s
+/// auto-join loop -- whichever runs first takes it out. Both sides
+/// hold an `Arc` clone of the same `Mutex<Option<JoinHandle<()>>>`.
+type SharedJoinSlot = Arc<Mutex<Option<JoinHandle<()>>>>;
+
+/// A scope for spawning threads that may borrow non-`'static` data.
+///
+/// Created by [`scope`]. Mirrors `std::thread::Scope`.
+pub struct Scope<'scope, 'env: 'scope> {
+    inner: Mutex<core::mem::ManuallyDrop<ScopeInner<'scope>>>,
+    /// Invariance over `'scope` (matches std). Without it, `'scope`
+    /// could shrink and the unsafe lifetime launder would be unsound.
+    scope: PhantomData<&'scope mut &'scope ()>,
+    env: PhantomData<&'env mut &'env ()>,
+}
+
+/// Trait-object behind which each spawn's `Arc<Mutex<Option<T>>>`
+/// keepalive lives. Exists so the `scope()` teardown loop can call
+/// `take_payload()` to extract the `T` and drop it on the main thread,
+/// regardless of how many `Arc` clones remain on other threads.
+trait ResultKeepalive: Send {
+    /// Take the inner `Option<T>::take()`, dropping the contained `T`
+    /// on the caller's thread (main, in `scope()`'s teardown loop).
+    ///
+    /// Drop-affinity is enforced by this take, not by `Arc` count: any
+    /// remaining `Arc<Mutex<Option<T>>>` clones (e.g. a slow-dropping
+    /// `result_for_thread` whose owning thread has notified-but-not-
+    /// fully-exited) will then see an `Option<T>::None` and run no
+    /// `T::Drop` of their own. The last `Arc` to drop frees the empty
+    /// `Mutex<Option<T>>` shell, which has no `T` to destruct.
+    fn take_payload(&self);
+}
+
+impl<T: Send> ResultKeepalive for Arc<Mutex<Option<T>>> {
+    fn take_payload(&self) {
+        let _ = self.lock().take();
+    }
+}
+
+struct ScopeInner<'scope> {
+    /// Pairs of `(shared_handle_slot, slot_keepalive)`. The keepalive
+    /// is the third clone of each spawn's `Arc<Mutex<Option<T>>>`,
+    /// behind a small `ResultKeepalive + 'scope` trait object. The
+    /// `'scope` bound is honest: the inner `Arc<Mutex<Option<T>>>`
+    /// holds a `T: 'scope`, and the Vec keeps the trait object alive
+    /// until `scope()`'s teardown drops it (which happens before
+    /// `'scope` ends).
+    pending: Vec<(SharedJoinSlot, Box<dyn ResultKeepalive + 'scope>)>,
+}
+
+/// An owned handle to a thread spawned via [`Scope::spawn`].
+///
+/// Dropping the handle does **not** detach the thread -- the auto-join
+/// in [`scope`] still waits for it. To collect the thread's result or
+/// panic, call [`ScopedJoinHandle::join`] before [`scope`] returns.
+pub struct ScopedJoinHandle<'scope, T> {
+    /// Shared with `Scope::inner.pending`. Whoever calls
+    /// `lock().take()` first claims the handle: `ScopedJoinHandle::join`
+    /// in the user path, the teardown loop in `scope` otherwise.
+    handle_slot: SharedJoinSlot,
+    result: Arc<Mutex<Option<T>>>,
+    _scope: PhantomData<&'scope ()>,
+}
+
+impl<T> ScopedJoinHandle<'_, T> {
+    /// Wait for the spawned thread to finish and return its result.
+    ///
+    /// # Errors
+    ///
+    /// Returns `Err` with the panic payload if the spawned thread
+    /// panicked. The surrounding [`scope`] will not double-panic in
+    /// that case: an explicitly joined handle absorbs the panic.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the handle slot or result slot is empty, which would
+    /// indicate a double-join or a wrapper closure that never
+    /// deposited its result. Both are internal invariant violations,
+    /// not user-visible conditions.
+    pub fn join(self) -> std::thread::Result<T> {
+        let handle = self
+            .handle_slot
+            .lock()
+            .take()
+            .expect("scoped thread handle was already taken (double join?)");
+        handle.join()?;
+        Ok(self
+            .result
+            .lock()
+            .take()
+            .expect("scoped thread did not deposit its result"))
+    }
+}
+
+/// Spawn scoped threads, joining all of them before returning.
+///
+/// See `std::thread::scope` for the full API contract. The shim matches
+/// that contract under loom.
+///
+/// # Panics
+///
+/// Propagates any panic from `f` after all spawned threads have been
+/// joined. If `f` itself didn't panic but any spawned thread did and
+/// the panic was never absorbed by an explicit `.join()`, panics with
+/// `"a scoped thread panicked"`.
+pub fn scope<'env, F, T>(f: F) -> T
+where
+    F: for<'scope> FnOnce(&'scope Scope<'scope, 'env>) -> T,
+{
+    let scope = Scope {
+        inner: Mutex::new(core::mem::ManuallyDrop::new(ScopeInner {
+            pending: Vec::new(),
+        })),
+        scope: PhantomData,
+        env: PhantomData,
+    };
+
+    // Run `f` inside `catch_unwind` so we can still wait for spawned
+    // threads even if `f` panicked.
+    let result = catch_unwind(AssertUnwindSafe(|| f(&scope)));
+
+    // Drain pending entries. For each, try to claim the handle from
+    // the shared slot; if it's `None`, the user already joined. Then
+    // drop the keepalive, which (now that the spawned thread has
+    // fully exited and dropped its own `Arc` clone of the result
+    // slot) lets `T`'s destructor run on this -- the main -- thread.
+    //
+    // If a spawned thread panicked, capture the first panic payload so
+    // we can `resume_unwind` it at the end -- matching `std::thread::scope`,
+    // which preserves the spawned thread's original assertion/panic
+    // message instead of synthesizing a generic one. We still join every
+    // handle so subsequent panics' associated keepalives get dropped on
+    // the main thread before we propagate.
+    //
+    // The drain is a loop, not a single take: a scoped thread can
+    // itself call `s.spawn(...)` and push a new pending entry while
+    // we're joining earlier ones. Taking `pending` once would leave
+    // those nested handles unjoined and violate the
+    // join-before-return contract the `'scope` -> `'static` lifetime
+    // transmute relies on. Loop until the queue stays empty.
+    let mut first_spawn_panic: Option<Box<dyn core::any::Any + Send + 'static>> = None;
+    loop {
+        let pending = core::mem::take(&mut scope.inner.lock().pending);
+        if pending.is_empty() {
+            break;
+        }
+        for (handle_slot, keepalive) in pending {
+            if let Some(handle) = handle_slot.lock().take()
+                && let Err(payload) = handle.join()
+                && first_spawn_panic.is_none()
+            {
+                first_spawn_panic = Some(payload);
+            }
+            // Drop-affinity: explicitly take the `Option<T>` payload out
+            // of the slot on this (the main) thread. `T::Drop` runs
+            // here, regardless of how many `Arc` clones of the slot
+            // still exist on other threads. The last `Arc` to drop
+            // (possibly on the spawned thread, in some interleavings
+            // where the spawned thread's wrapper has notified but
+            // hasn't fully released its capture) then frees the empty
+            // shell, which contains no `T` to destruct.
+            //
+            // This is stricter than std's `Drop` ordering (std relies
+            // on `JoinHandle::join()` synchronously waiting for the
+            // spawned thread to fully terminate, including capture
+            // drops). loom's `JoinHandle::join` only synchronises on
+            // the spawned thread's notify, which can fire before all
+            // captures have dropped -- so we can't rely on the Arc
+            // count being exactly 1 here.
+            keepalive.take_payload();
+            drop(keepalive);
+        }
+    }
+
+    // SAFETY: `scope.inner` wraps `ScopeInner<'scope>` in
+    // `ManuallyDrop` so that the auto-drop of `scope` (a local
+    // bound by the function block's lifetime) does not destruct
+    // `'scope`-bearing data -- that would force `'scope` to
+    // outlive `scope`, but `'scope` is fixed by the HRTB-chosen
+    // borrow at `f(&scope)` and is necessarily shorter than
+    // `scope`'s local lifetime. We are still inside `scope()`'s
+    // body here, so `'scope` is alive, the explicit
+    // `ManuallyDrop::drop` is the correct place to release the
+    // (now-emptied) `Vec` allocation. The inner is never accessed
+    // again after this point: the function only matches `result`
+    // and returns. Loom 0.7's leak check at the end of each
+    // `loom::model` iteration would otherwise flag the leaked
+    // allocation.
+    unsafe {
+        core::mem::ManuallyDrop::drop(&mut scope.inner.lock());
+    }
+
+    match result {
+        // The `f` body itself panicked. Its panic dominates (it's the
+        // outermost frame), so propagate it. A spawned panic captured
+        // in `first_spawn_panic` is silently dropped on this path,
+        // matching std's behaviour.
+        Err(e) => resume_unwind(e),
+        // No body panic, but at least one spawned thread did. Resume the
+        // first spawned panic with its original payload -- preserves
+        // assertion messages and any other diagnostic carried in the
+        // payload. (std does the same thing via JoinInner::drop +
+        // a_thread_panicked.)
+        Ok(_) if first_spawn_panic.is_some() => {
+            resume_unwind(first_spawn_panic.expect("just checked"))
+        }
+        Ok(r) => r,
+    }
+}
+
+impl<'scope> Scope<'scope, '_> {
+    /// Spawn a thread within the scope.
+    ///
+    /// The closure may borrow data of any lifetime that outlives the
+    /// scope (i.e. `'env`). The scope guarantees the thread is joined
+    /// before [`scope`] returns, so those borrows remain valid for the
+    /// duration of the thread.
+    pub fn spawn<F, T>(&'scope self, f: F) -> ScopedJoinHandle<'scope, T>
+    where
+        F: FnOnce() -> T + Send + 'scope,
+        T: Send + 'scope,
+    {
+        let result_slot: Arc<Mutex<Option<T>>> = Arc::new(Mutex::new(None));
+        let result_for_thread = Arc::clone(&result_slot);
+        // Third clone, kept alive by the Scope itself until after the
+        // thread is joined. See module docs ("Why the result_slot is
+        // held in three places").
+        let result_keepalive = Arc::clone(&result_slot);
+
+        let wrapped = move || {
+            // Mirror std: catch the panic and resume so loom sees the
+            // thread terminate with a panic. The scope's `pending`
+            // loop will record the panic via `JoinHandle::join()`.
+            //
+            // `result_for_thread` (the spawned-thread Arc clone of the
+            // result slot) is dropped implicitly when the closure body
+            // exits.  We do not rely on that drop happening before
+            // `scope()`'s teardown runs `T::Drop`; loom's `JoinHandle::
+            // join` synchronises only on `notify`, which can fire before
+            // the closure's captures have fully been released.
+            // `scope()`'s teardown calls `take_payload` to drop the `T`
+            // on the main thread regardless of the Arc count, which is
+            // what the keepalive trait's contract guarantees.
+            match catch_unwind(AssertUnwindSafe(f)) {
+                Ok(v) => {
+                    *result_for_thread.lock() = Some(v);
+                }
+                Err(e) => resume_unwind(e),
+            }
+        };
+
+        // SAFETY: `loom::thread::spawn` requires `F: 'static` (no
+        // `spawn_unchecked` is available in loom 0.7), so we have to
+        // lifetime-launder the closure box from `'scope` to `'static`.
+        // Soundness rests on the join-before-return contract: every
+        // spawned thread is joined by `scope()`'s teardown loop before
+        // `scope()` returns. By that time the closure has run, its
+        // captures (including `result_for_thread`, the only capture
+        // bound to `'scope`) have dropped, and the user-visible
+        // `ScopedJoinHandle.result` has dropped (the handle's `'scope`
+        // bound forces it). loom's `JoinHandle::join` synchronises on
+        // `notify`, which the spawned thread emits after `f()` returns;
+        // by that point the wrapper closure's captures are gone. The
+        // `take_payload` step in the teardown loop additionally drops
+        // any leftover `T` on the main thread, so even in interleavings
+        // where the spawned thread's `Arc` clone of the result slot
+        // outlives `notify`, `T::Drop` does not run on the spawned
+        // thread. This is the same lifetime-launder pattern std uses
+        // for `spawn_unchecked` internally.
+        let wrapped: Box<dyn FnOnce() + Send + 'static> = unsafe {
+            core::mem::transmute::<
+                Box<dyn FnOnce() + Send + 'scope>,
+                Box<dyn FnOnce() + Send + 'static>,
+            >(Box::new(wrapped))
+        };
+
+        let join_handle = thread::spawn(wrapped);
+
+        // Shared handle slot: `scope()` and the user's
+        // `ScopedJoinHandle` both hold an `Arc` clone of the same
+        // `Mutex<Option<JoinHandle<()>>>`. Whoever calls
+        // `lock().take()` first claims the join.
+        let handle_slot: SharedJoinSlot = Arc::new(Mutex::new(Some(join_handle)));
+        let handle_for_scope = Arc::clone(&handle_slot);
+
+        // No lifetime launder needed: `ScopeInner<'scope>` carries the
+        // `'scope` parameter on the `Box<dyn ResultKeepalive + 'scope>`
+        // it stores, so the trait object's lifetime is honest. The
+        // for-all-`'scope` HRTB on `scope()`'s `F` resolves because
+        // `Scope<'scope, 'env>` is already parameterised by `'scope`
+        // and `'scope`'s invariance (the `PhantomData<&'scope mut
+        // &'scope ()>`) keeps the chosen `'scope` from shrinking.
+        let keepalive: Box<dyn ResultKeepalive + 'scope> = Box::new(result_keepalive);
+        self.inner
+            .lock()
+            .pending
+            .push((handle_for_scope, keepalive));
+
+        ScopedJoinHandle {
+            handle_slot,
+            result: result_slot,
+            _scope: PhantomData,
+        }
+    }
+}
diff --git a/concurrency/src/thread/mod.rs b/concurrency/src/thread/mod.rs
new file mode 100644
index 0000000000..d468568de9
--- /dev/null
+++ b/concurrency/src/thread/mod.rs
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Backend-routed threading primitives.
+//!
+//! Re-exports the active backend's `thread` module wholesale (`spawn`,
+//! `current`, `sleep`, `yield_now`, `JoinHandle`, `Thread`, `ThreadId`,
+//! `Builder`, ...) so call sites use one path regardless of whether
+//! they're building against `std`, `loom`, or `shuttle`.
+//!
+//! ## `thread::scope`
+//!
+//! `std::thread::scope` (stable since 1.63) and `shuttle::thread::scope`
+//! are re-exported directly. `loom` 0.7 does not provide `scope`, so we
+//! ship a local shim in [`loom_scope`] that matches the std API on top
+//! of loom's `spawn` + `park`/`unpark` + atomic primitives, with a
+//! narrow `unsafe` lifetime launder (same trick std uses internally).
+//!
+//! Tests written in terms of `concurrency::thread::scope` work
+//! identically across every backend; no `Box::into_raw`/`'static`
+//! workarounds at call sites.
+
+#[cfg(not(any(feature = "loom", feature = "shuttle")))]
+pub use std::thread::*;
+
+#[cfg(all(
+    feature = "shuttle",
+    not(feature = "loom"),
+    not(feature = "silence_clippy")
+))]
+pub use shuttle::thread::*;
+
+#[cfg(all(feature = "loom", not(feature = "silence_clippy")))]
+pub use loom::thread::*;
+
+#[cfg(all(feature = "loom", not(feature = "silence_clippy")))]
+mod loom_scope;
+
+#[cfg(all(feature = "loom", not(feature = "silence_clippy")))]
+pub use loom_scope::{Scope, ScopedJoinHandle, scope};
+
+// Match the silence_clippy escape hatch in `crate::sync`: under
+// `--all-features` both loom and shuttle are enabled at once, which
+// can't pick a single backend. Route to `std::thread` so the binary
+// type-checks; it is never executed in that configuration.
+#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))]
+pub use std::thread::*;
diff --git a/concurrency/tests/arc_weak.rs b/concurrency/tests/arc_weak.rs
new file mode 100644
index 0000000000..6fd38fc3ab
--- /dev/null
+++ b/concurrency/tests/arc_weak.rs
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Direct coverage for the `concurrency::sync::Arc<T>` wrapper and
+//! `Weak<T>` shim.
+//!
+//! Loom 0.7 does not ship `Weak<T>` and does not give `loom::sync::Arc`
+//! an associated `downgrade` function. The crate adds both as a thin
+//! wrapper around `loom::sync::Arc` (see `concurrency/src/sync/test_facade.rs`).
+//! Because the shim is custom code -- not a re-export -- it needs its
+//! own test coverage; otherwise the only thing exercising it is
+//! `quiescent_model.rs`, which uses it as a building block and would
+//! surface failures as misbehaving QSBR tests rather than as
+//! localised shim bugs.
+//!
+//! Run under loom with:
+//!
+//! ```sh
+//! cargo test --release -p dataplane-concurrency --features loom --test arc_weak
+//! ```
+//!
+//! The tests also pass on the default and shuttle backends -- the
+//! contract is the same; only the *internals* of `Arc`/`Weak` differ.
+//! Documented quirks of the loom shim (e.g. `Weak::upgrade` succeeds
+//! even after the last `Arc` drop, `weak_count` is always `0`) have
+//! tests gated to `concurrency = "loom"` to avoid asserting on real
+//! `std::sync` / `shuttle::sync` semantics.
+//!
+//! `shuttle_pct` is opted out at file level: PCT is for biasing toward
+//! rare interleavings of concurrent code, but most of the tests in this
+//! file are protocol-level checks on `Arc` / `Weak` and either run on a
+//! single thread or only briefly spawn a helper. PCT panics on bodies
+//! that do not exercise sustained concurrency on the main thread, and
+//! the contract being tested here is identical to what the plain
+//! `shuttle` (random) variant already covers.
+
+#![cfg(not(feature = "shuttle_pct"))]
+
+// `#[concurrency::test]` is provided by `dataplane-concurrency`; alias
+// the crate so the macro path resolves inside this integration test.
+extern crate dataplane_concurrency as concurrency;
+
+use dataplane_concurrency::sync::Arc;
+use dataplane_concurrency::sync::atomic::{AtomicUsize, Ordering};
+use dataplane_concurrency::sync::{Mutex, Weak};
+use dataplane_concurrency::thread;
+
+#[concurrency::test]
+fn arc_new_strong_count_is_one() {
+    let a = Arc::new(42u32);
+    assert_eq!(Arc::strong_count(&a), 1);
+}
+
+#[concurrency::test]
+fn arc_clone_then_drop_round_trips_strong_count() {
+    let a = Arc::new(42u32);
+    let b = a.clone();
+    assert!(Arc::strong_count(&a) >= 2);
+    drop(b);
+    // After `b` drops, `a` is the only remaining strong (modulo any
+    // `Weak`-quirk count contributions, none here).
+    assert_eq!(Arc::strong_count(&a), 1);
+}
+
+#[concurrency::test]
+fn arc_ptr_eq_same_allocation_is_true() {
+    let a = Arc::new(42u32);
+    let b = a.clone();
+    assert!(Arc::ptr_eq(&a, &b));
+}
+
+#[concurrency::test]
+fn arc_ptr_eq_different_allocations_is_false() {
+    let a = Arc::new(42u32);
+    let b = Arc::new(42u32);
+    assert!(!Arc::ptr_eq(&a, &b));
+}
+
+#[concurrency::test]
+fn weak_new_upgrades_to_none() {
+    let w: Weak<u32> = Weak::new();
+    assert!(w.upgrade().is_none());
+}
+
+#[concurrency::test]
+fn arc_downgrade_then_upgrade_returns_value() {
+    let a = Arc::new(42u32);
+    let w = Arc::downgrade(&a);
+    let upgraded = w.upgrade().expect("upgrade of fresh weak should succeed");
+    assert_eq!(*upgraded, 42);
+}
+
+#[concurrency::test]
+fn arc_new_uninit_then_assume_init_round_trip() {
+    let mut uninit: Arc<core::mem::MaybeUninit<u32>> = Arc::new_uninit();
+    let slot = Arc::get_mut(&mut uninit).expect("sole strong reference");
+    slot.write(42);
+    // SAFETY: just initialised via `write`.
+    #[allow(unsafe_code)]
+    let init = unsafe { uninit.assume_init() };
+    assert_eq!(*init, 42);
+}
+
+#[concurrency::test]
+fn weak_into_raw_from_raw_round_trips() {
+    let a = Arc::new(42u32);
+    let w = Arc::downgrade(&a);
+    let raw = w.into_raw();
+    // SAFETY: `a` is still alive, so `raw` points at a live allocation.
+    #[allow(unsafe_code)]
+    let value = unsafe { *raw };
+    assert_eq!(value, 42);
+    // SAFETY: `raw` came from `Weak::into_raw`, never used elsewhere.
+    #[allow(unsafe_code)]
+    let recovered = unsafe { Weak::from_raw(raw) };
+    let upgraded = recovered.upgrade().expect("upgrade after round-trip");
+    assert_eq!(*upgraded, 42);
+}
+
+#[concurrency::test]
+fn arc_display_forwards_to_inner() {
+    let a = Arc::new(42u32);
+    assert_eq!(format!("{a}"), "42");
+}
+
+#[concurrency::test]
+fn arc_pointer_format_yields_address() {
+    let a = Arc::new(42u32);
+    // The exact representation is `0x...` on every platform we
+    // target; just check the format is non-empty and starts with `0x`.
+    let p = format!("{a:p}");
+    assert!(p.starts_with("0x"), "pointer format unexpected: {p}");
+}
+
+// ---------- documented-quirk tests (loom-only) ----------
+
+/// Under the loom shim, `Weak` holds a strong clone of the inner
+/// `loom::sync::Arc`, so `upgrade` succeeds even after every original
+/// `Arc` has dropped. This is the documented limitation explained in
+/// the module-level docs of `concurrency/src/sync/loom_backend.rs`;
+/// the test pins the behaviour so a future "real `Weak`"
+/// implementation fails this test loudly rather than silently
+/// changing semantics.
+#[cfg(feature = "loom")]
+#[concurrency::test]
+fn loom_quirk_weak_keeps_strong_alive() {
+    let a = Arc::new(42u32);
+    let w = Arc::downgrade(&a);
+    drop(a);
+    // Under real `std::sync::Weak` semantics, this would be `None`.
+    // Under the loom shim, the `Weak` itself holds a strong clone.
+    let upgraded = w.upgrade().expect("loom shim quirk: Weak keeps strong");
+    assert_eq!(*upgraded, 42);
+}
+
+// ---------- multi-thread (loom multiplies via scheduling) ----------
+
+/// Two threads each clone, read, and drop independent `Arc` clones.
+/// Loom explores all interleavings of the strong-count operations.
+#[concurrency::test]
+fn two_threads_clone_and_drop_independently() {
+    let a = Arc::new(42u32);
+    let a1 = a.clone();
+    let a2 = a.clone();
+    let h1 = thread::spawn(move || {
+        assert_eq!(*a1, 42);
+    });
+    let h2 = thread::spawn(move || {
+        assert_eq!(*a2, 42);
+    });
+    h1.join().unwrap();
+    h2.join().unwrap();
+    // After the spawned threads have joined and dropped their
+    // clones, only `a` remains.
+    assert_eq!(Arc::strong_count(&a), 1);
+}
+
+/// A `Weak` registered in a `Mutex`-protected slot survives concurrent
+/// reader access. This is a tiny analogue of the QSBR usage pattern in
+/// `nat::stateful::apalloc`: a `Weak<T>` slot upgraded by a reader
+/// thread while another thread holds an `Arc` to the value.
+#[concurrency::test]
+fn mutex_protected_weak_slot_upgrade() {
+    let a = Arc::new(99u32);
+    let slot: Arc<Mutex<Option<Weak<u32>>>> = Arc::new(Mutex::new(Some(Arc::downgrade(&a))));
+    let slot_for_thread = Arc::clone(&slot);
+    let read = Arc::new(AtomicUsize::new(0));
+    let read_for_thread = Arc::clone(&read);
+    let h = thread::spawn(move || {
+        let guard = slot_for_thread.lock();
+        if let Some(w) = guard.as_ref()
+            && let Some(inner) = w.upgrade()
+        {
+            read_for_thread.store(*inner as usize, Ordering::SeqCst);
+        }
+    });
+    h.join().unwrap();
+    assert_eq!(read.load(Ordering::SeqCst), 99);
+    drop(a);
+}
diff --git a/concurrency/tests/quiescent_loom.rs b/concurrency/tests/quiescent_loom.rs
deleted file mode 100644
index ac8fe5038e..0000000000
--- a/concurrency/tests/quiescent_loom.rs
+++ /dev/null
@@ -1,245 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// Copyright Open Network Fabric Authors
-
-//! Loom model-checking tests for `dataplane_quiescent`.
-//!
-//! These tests run only under `--features loom`.  Standard protocol
-//! tests live in `tests/protocol.rs`; bolero properties in
-//! `tests/properties.rs`; bolero x shuttle in `tests/shuttle.rs`.
-//!
-//! Run with:
-//!
-//! ```sh
-//! cargo test --release -p dataplane-quiescent --features loom --test loom
-//! ```
-//!
-//! ## Why the `unsafe`
-//!
-//! Loom 0.7.2 doesn't expose `thread::scope`, only `thread::spawn`,
-//! which requires `'static`.  But the new lifetime-bounded API gives
-//! us a `Subscriber<'p, T>` that borrows from the `Publisher` -- there
-//! is no `'static` to satisfy `thread::spawn` with.
-//!
-//! Workaround: each `loom::model` iteration boxes a fresh `Publisher`,
-//! lifts it to `&'static` via `Box::into_raw` for the body of the
-//! iteration, and recovers the `Box` at the end (so loom's Arc-leak
-//! audit is satisfied).  The unsafe is local, narrow, and well-paired:
-//! every `into_raw` has a matching `from_raw`.
-//!
-//! `Box::leak` on its own would not work -- loom audits `Arc` cleanup
-//! at the end of every model iteration and panics on leaked clones.
-//!
-//! ## Sizing
-//!
-//! Loom explores all legal interleavings of the operations inside each
-//! `loom::model(|| { ... })` block.  Keep test bodies minimal -- each
-//! extra atomic op multiplies the search space.  Two threads with one
-//! atomic op each is roughly the right shape; "2 publishes + 2
-//! subscribers + a drop" already explodes.
-
-#![cfg(feature = "loom")]
-
-use loom::thread;
-
-use dataplane_concurrency::quiescent::{Publisher, channel};
-
-/// Run `body` with a `&'static` reference to a freshly-constructed
-/// `Publisher`.  After `body` returns, recover the `Box` and drop the
-/// `Publisher` so loom's Arc-leak audit is satisfied.
-///
-/// The `'static` lifetime is real for the duration of `body` (the
-/// `Publisher` is live in heap-allocated memory until `Box::from_raw`
-/// runs after `body`).  Caller must not retain any references derived
-/// from the `&'static Publisher` past the return of `body`.
-fn with_static_publisher<F>(body: F)
-where
-    F: FnOnce(&'static Publisher<u32>),
-{
-    let raw: *mut Publisher<u32> = Box::into_raw(Box::new(channel(0u32)));
-    // SAFETY: `raw` was just produced by `Box::into_raw` and is not
-    // freed until the matching `Box::from_raw` below.  No aliasing
-    // occurs: `body` consumes the only handle.
-    let publisher: &'static Publisher<u32> = unsafe { &*raw };
-    body(publisher);
-    // SAFETY: `body` has returned and the contract requires no
-    // outstanding references to `publisher`.  `raw` is still the
-    // unique pointer to the heap allocation.
-    drop(unsafe { Box::from_raw(raw) });
-}
-
-/// A snapshot taken after a publish must observe a value the Publisher
-/// ever stored.  Under any interleaving of `publish` vs `snapshot`, the
-/// Subscriber sees either the initial or the published value, never
-/// anything else (no torn reads, no use-after-free).
-#[test]
-fn snapshot_observes_a_legal_value() {
-    loom::model(|| {
-        with_static_publisher(|publisher| {
-            let factory = publisher.factory();
-
-            let sub_handle = thread::spawn(move || {
-                let mut sub = factory.subscriber();
-                let observed = *sub.snapshot();
-                assert!(
-                    observed == 0 || observed == 1,
-                    "Subscriber observed illegal value {observed}",
-                );
-            });
-
-            publisher.publish(1u32);
-            sub_handle.join().unwrap();
-        });
-    });
-}
-
-/// A Subscriber that takes a snapshot before the Publisher publishes,
-/// then is dropped concurrently with the Publisher's reclaim, must not
-/// deadlock and must not leave the protocol in an inconsistent state.
-#[test]
-fn subscriber_drop_during_publish_is_safe() {
-    loom::model(|| {
-        with_static_publisher(|publisher| {
-            let factory = publisher.factory();
-
-            let sub_handle = thread::spawn(move || {
-                let mut sub = factory.subscriber();
-                let _ = *sub.snapshot();
-                // Subscriber drops at end of thread; concurrent with publisher below.
-            });
-
-            publisher.publish(1u32);
-            publisher.reclaim();
-            sub_handle.join().unwrap();
-        });
-    });
-}
-
-/// A Subscriber that snapshots after `publish` returns must observe the
-/// published value, not the initial.  This pins down the
-/// publish-then-snapshot ordering.
-#[test]
-fn snapshot_after_publish_observes_published() {
-    loom::model(|| {
-        with_static_publisher(|publisher| {
-            let mut sub = publisher.factory().subscriber();
-            publisher.publish(1u32);
-            let observed = *sub.snapshot();
-            assert_eq!(
-                observed, 1,
-                "snapshot taken after publish() returns must observe the published value",
-            );
-        });
-    });
-}
-
-/// Subscriber registered before publish, snapshot taken after -- should
-/// observe the published value.  The 0-sentinel branch in
-/// `min_observed` must not turn this into a use-after-free.
-#[test]
-fn registered_then_publish_then_snapshot() {
-    loom::model(|| {
-        with_static_publisher(|publisher| {
-            let factory = publisher.factory();
-
-            let sub_handle = thread::spawn(move || {
-                let mut sub = factory.subscriber();
-                // Snapshot may race with publish.  Either way, we must see
-                // a legal value.
-                let observed = *sub.snapshot();
-                assert!(observed == 0 || observed == 1);
-            });
-
-            publisher.publish(1u32);
-            publisher.reclaim();
-            sub_handle.join().unwrap();
-        });
-    });
-}
-
-// =====================================================================
-// Drop affinity: every `Versioned` destructor must run on the
-// Publisher's thread.  This is the headline guarantee of the crate;
-// the existing tests above check legality and absence of deadlocks but
-// do not verify the drop-thread invariant under all interleavings.
-// =====================================================================
-
-/// Payload whose `Drop` records the thread on which it ran.  We use
-/// `std::sync::Mutex` for the recording slot because loom doesn't need
-/// to model contention on it (only one drop per `Versioned`, and we
-/// only care about the thread id, not the order of records).
-struct DropMarker {
-    drops: std::sync::Arc<std::sync::Mutex<Vec<loom::thread::ThreadId>>>,
-}
-
-impl Drop for DropMarker {
-    fn drop(&mut self) {
-        self.drops
-            .lock()
-            .expect("recording mutex poisoned")
-            .push(loom::thread::current().id());
-    }
-}
-
-/// Verifies the drop-affinity invariant under all loom interleavings.
-///
-/// Setup: Publisher publishes a fresh marker (the initial goes into
-/// `retired`) while a Subscriber thread snapshots and then drops.  Any
-/// interleaving of those two threads must result in **all**
-/// `Versioned` destructors running on the Publisher's thread.  In
-/// particular: the race where Subscriber's `cached = None` decrement
-/// of `Versioned`'s strong count and Publisher's `retired.clear()`
-/// decrement of the same atomic could (on weak memory) reorder, is
-/// enforced by the Acquire fence in `min_observed` after the
-/// `Arc::strong_count == 1` check.
-#[test]
-fn destructor_of_initial_runs_on_publisher_thread() {
-    loom::model(|| {
-        let drops: std::sync::Arc<std::sync::Mutex<Vec<loom::thread::ThreadId>>> =
-            std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
-
-        let initial = DropMarker {
-            drops: std::sync::Arc::clone(&drops),
-        };
-        let raw: *mut Publisher<DropMarker> = Box::into_raw(Box::new(channel(initial)));
-        // SAFETY: `raw` is the unique pointer to the heap allocation;
-        // the matching `Box::from_raw` runs after all spawned work has
-        // joined and no references derived from `publisher` survive.
-        let publisher: &'static Publisher<DropMarker> = unsafe { &*raw };
-
-        let publisher_thread = loom::thread::current().id();
-
-        // Subscriber thread: snapshot then drop.  Race against the
-        // publisher's publish/reclaim below.
-        let factory = publisher.factory();
-        let drops_for_pub = std::sync::Arc::clone(&drops);
-        let sub_handle = thread::spawn(move || {
-            let mut sub = factory.subscriber();
-            let _ = sub.snapshot();
-            // sub drops at end of thread; concurrent with publisher.
-        });
-
-        // Publisher publishes a new marker (initial goes into retired).
-        publisher.publish(DropMarker {
-            drops: drops_for_pub,
-        });
-
-        sub_handle.join().unwrap();
-        // Force a final reclaim pass so retired drains deterministically.
-        publisher.reclaim();
-
-        // SAFETY: subscriber thread has joined; no references derived
-        // from `publisher` are still in use.
-        drop(unsafe { Box::from_raw(raw) });
-
-        // Every recorded drop must have happened on the publisher
-        // (main) thread.
-        let recorded = drops.lock().expect("recording mutex poisoned");
-        for (i, t) in recorded.iter().enumerate() {
-            assert_eq!(
-                *t, publisher_thread,
-                "DropMarker {i} ran its destructor on {t:?}, \
-                 not the publisher thread {publisher_thread:?}",
-            );
-        }
-    });
-}
diff --git a/concurrency/tests/quiescent_model.rs b/concurrency/tests/quiescent_model.rs
new file mode 100644
index 0000000000..2ad62e556d
--- /dev/null
+++ b/concurrency/tests/quiescent_model.rs
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Model-checking tests for `dataplane_concurrency::quiescent`.
+//!
+//! Each test is marked `#[concurrency::test]`, which routes the body to
+//! whichever backend is active:
+//!
+//! * default -- runs the body once directly (smoke test)
+//! * `loom` -- exhaustive interleaving exploration via `loom::model`
+//! * `shuttle` / `shuttle_pct` / `shuttle_dfs` -- randomized / PCT /
+//!   DFS schedule exploration
+//!
+//! Run under loom (the headline use case) with:
+//!
+//! ```sh
+//! cargo test --release -p dataplane-concurrency --features loom --test quiescent_model
+//! ```
+//!
+//! Standard protocol tests (real OS threads + `thread::scope` + sleeps)
+//! live in `tests/quiescent_protocol.rs`; bolero property tests in
+//! `tests/quiescent_properties.rs`; bolero x shuttle in
+//! `tests/quiescent_shuttle.rs`.
+//!
+//! ## Sizing
+//!
+//! Loom explores all legal interleavings of the operations inside each
+//! invocation.  Keep test bodies minimal -- each extra atomic op
+//! multiplies the search space.  Two threads with one atomic op each is
+//! roughly the right shape; "2 publishes + 2 subscribers + a drop"
+//! already explodes.
+
+// The proc macro `#[concurrency::test]` expands to `::concurrency::stress(...)`.
+// Inside the crate's own integration tests we don't have a `concurrency` Cargo
+// alias (cargo rejects self-deps), so alias the crate manually.
+extern crate dataplane_concurrency as concurrency;
+
+use concurrency::quiescent::channel;
+use concurrency::thread;
+
+/// A snapshot taken after a publish must observe a value the Publisher
+/// ever stored.  Under any interleaving of `publish` vs `snapshot`, the
+/// Subscriber sees either the initial or the published value, never
+/// anything else (no torn reads, no use-after-free).
+#[concurrency::test]
+fn snapshot_observes_a_legal_value() {
+    let publisher = channel(0u32);
+    thread::scope(|s| {
+        let factory = publisher.factory();
+        s.spawn(move || {
+            let mut sub = factory.subscriber();
+            let observed = *sub.snapshot();
+            assert!(
+                observed == 0 || observed == 1,
+                "Subscriber observed illegal value {observed}",
+            );
+        });
+        publisher.publish(1u32);
+    });
+}
+
+/// A Subscriber that takes a snapshot before the Publisher publishes,
+/// then is dropped concurrently with the Publisher's reclaim, must not
+/// deadlock and must not leave the protocol in an inconsistent state.
+#[concurrency::test]
+fn subscriber_drop_during_publish_is_safe() {
+    let publisher = channel(0u32);
+    thread::scope(|s| {
+        let factory = publisher.factory();
+        s.spawn(move || {
+            let mut sub = factory.subscriber();
+            let _ = *sub.snapshot();
+            // Subscriber drops at end of thread; concurrent with publisher below.
+        });
+        publisher.publish(1u32);
+        publisher.reclaim();
+    });
+}
+
+/// A Subscriber that snapshots after `publish` returns must observe the
+/// published value, not the initial.  This pins down the
+/// publish-then-snapshot ordering.
+///
+/// Skipped under `shuttle_pct`: this test is single-threaded by design
+/// and PCT specifically panics on closures that don't exercise
+/// concurrency.  The other backends accept it.
+#[cfg(not(feature = "shuttle_pct"))]
+#[concurrency::test]
+fn snapshot_after_publish_observes_published() {
+    let publisher = channel(0u32);
+    let mut sub = publisher.factory().subscriber();
+    publisher.publish(1u32);
+    let observed = *sub.snapshot();
+    assert_eq!(
+        observed, 1,
+        "snapshot taken after publish() returns must observe the published value",
+    );
+}
+
+/// Subscriber registered before publish, snapshot taken after -- should
+/// observe the published value.  The 0-sentinel branch in
+/// `min_observed` must not turn this into a use-after-free.
+#[concurrency::test]
+fn registered_then_publish_then_snapshot() {
+    let publisher = channel(0u32);
+    thread::scope(|s| {
+        let factory = publisher.factory();
+        s.spawn(move || {
+            let mut sub = factory.subscriber();
+            // Snapshot may race with publish.  Either way, we must see
+            // a legal value.
+            let observed = *sub.snapshot();
+            assert!(observed == 0 || observed == 1);
+        });
+        publisher.publish(1u32);
+        publisher.reclaim();
+    });
+}
+
+// =====================================================================
+// Drop affinity: every `Versioned` destructor must run on the
+// Publisher's thread.  This is the headline guarantee of the crate;
+// the existing tests above check legality and absence of deadlocks but
+// do not verify the drop-thread invariant under all interleavings.
+// =====================================================================
+
+/// Payload whose `Drop` records the thread on which it ran.  We use
+/// `std::sync::Mutex` for the recording slot because the model checker
+/// doesn't need to model contention on it (only one drop per
+/// `Versioned`, and we only care about the thread id, not the order of
+/// records).
+struct DropMarker {
+    drops: std::sync::Arc<std::sync::Mutex<Vec<thread::ThreadId>>>,
+}
+
+impl Drop for DropMarker {
+    fn drop(&mut self) {
+        self.drops
+            .lock()
+            .expect("recording mutex poisoned")
+            .push(thread::current().id());
+    }
+}
+
+/// Verifies the drop-affinity invariant under all interleavings the
+/// active backend explores.
+///
+/// Setup: Publisher publishes a fresh marker (the initial goes into
+/// `retired`) while a Subscriber thread snapshots and then drops.  Any
+/// interleaving of those two threads must result in **all**
+/// `Versioned` destructors running on the Publisher's thread.  In
+/// particular: the race where Subscriber's `cached = None` decrement
+/// of `Versioned`'s strong count and Publisher's `retired.clear()`
+/// decrement of the same atomic could (on weak memory) reorder, is
+/// enforced by the Acquire fence in `min_observed` after the
+/// `Arc::strong_count == 1` check.
+#[concurrency::test]
+fn destructor_of_initial_runs_on_publisher_thread() {
+    let drops: std::sync::Arc<std::sync::Mutex<Vec<thread::ThreadId>>> =
+        std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+
+    let initial = DropMarker {
+        drops: std::sync::Arc::clone(&drops),
+    };
+    let publisher = channel(initial);
+
+    let publisher_thread = thread::current().id();
+
+    thread::scope(|s| {
+        // Subscriber thread: snapshot then drop.  Race against the
+        // publisher's publish/reclaim below.
+        let factory = publisher.factory();
+        s.spawn(move || {
+            let mut sub = factory.subscriber();
+            let _ = sub.snapshot();
+            // sub drops at end of thread; concurrent with publisher.
+        });
+
+        // Publisher publishes a new marker (initial goes into retired).
+        publisher.publish(DropMarker {
+            drops: std::sync::Arc::clone(&drops),
+        });
+    });
+
+    // Force a final reclaim pass so retired drains deterministically.
+    publisher.reclaim();
+    drop(publisher);
+
+    // Every recorded drop must have happened on the publisher
+    // (main) thread.
+    let recorded = drops.lock().expect("recording mutex poisoned");
+    for (i, t) in recorded.iter().enumerate() {
+        assert_eq!(
+            *t, publisher_thread,
+            "DropMarker {i} ran its destructor on {t:?}, \
+             not the publisher thread {publisher_thread:?}",
+        );
+    }
+}
diff --git a/concurrency/tests/scope_property.rs b/concurrency/tests/scope_property.rs
new file mode 100644
index 0000000000..a13a6a3e53
--- /dev/null
+++ b/concurrency/tests/scope_property.rs
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Bolero property test for `thread::scope`.
+//!
+//! Generates a [`Plan`] (a small number of spawned threads, each with a
+//! small number of `fetch_add` ops on a shared counter) via bolero,
+//! then runs each plan under the active backend.  Each bolero iteration
+//! is one *shape* (spawn count, per-spawn op count); under shuttle each
+//! shape gets exercised against one randomly chosen schedule.  Many
+//! bolero iterations widen both axes cheaply.
+//!
+//! This is the cheap-per-call counterpart to `tests/loom_scope.rs`'s
+//! hand-picked scenarios.  Loom-style exhaustive exploration of the
+//! shim under a large random plan would blow up; bolero x shuttle gets
+//! breadth where loom would only give depth on a tiny case.
+//!
+//! The headline property is conservation: at `scope()` return, the
+//! shared counter must equal the sum of all increments the spawned
+//! threads were instructed to perform.  If `scope()` returned without
+//! joining a thread (loom shim bug), or if any `Drop` running outside
+//! the scope clobbered the count, this assertion fires.
+//!
+//! Loom is deliberately excluded -- the search space explodes with
+//! large plans.  Use `tests/loom_scope.rs` for loom coverage.
+
+#![cfg(not(feature = "loom"))]
+
+use std::panic::RefUnwindSafe;
+
+use bolero::TypeGenerator;
+use dataplane_concurrency::sync::Arc;
+use dataplane_concurrency::sync::atomic::{AtomicUsize, Ordering};
+use dataplane_concurrency::thread;
+
+/// One spawned thread's program: a list of increments to perform on
+/// the shared counter.  Each `u8` is masked to a small range so the
+/// test stays cheap under shuttle.
+#[derive(Clone, Debug, TypeGenerator)]
+struct ThreadPlan {
+    increments: Vec<u8>,
+}
+
+/// A scope's program: up to a few spawned threads.  Bolero generates
+/// arbitrarily long `Vec<ThreadPlan>` but we clamp to keep search cost
+/// bounded inside `run_plan`.
+#[derive(Clone, Debug, TypeGenerator)]
+struct Plan {
+    threads: Vec<ThreadPlan>,
+}
+
+const MAX_THREADS: usize = 4;
+const MAX_INCREMENTS_PER_THREAD: usize = 4;
+
+fn expected_sum(plan: &Plan) -> usize {
+    plan.threads
+        .iter()
+        .take(MAX_THREADS)
+        .map(|tp| {
+            tp.increments
+                .iter()
+                .take(MAX_INCREMENTS_PER_THREAD)
+                .map(|i| (*i & 0x0f) as usize)
+                .sum::<usize>()
+        })
+        .sum()
+}
+
+fn run_plan(plan: &Plan) {
+    let counter = Arc::new(AtomicUsize::new(0));
+    let expected = expected_sum(plan);
+
+    thread::scope(|s| {
+        for tp in plan.threads.iter().take(MAX_THREADS) {
+            let counter_for_thread = Arc::clone(&counter);
+            let increments: Vec<u8> = tp
+                .increments
+                .iter()
+                .take(MAX_INCREMENTS_PER_THREAD)
+                .copied()
+                .collect();
+            s.spawn(move || {
+                for inc in &increments {
+                    counter_for_thread.fetch_add((*inc & 0x0f) as usize, Ordering::SeqCst);
+                }
+            });
+        }
+    });
+
+    let observed = counter.load(Ordering::SeqCst);
+    assert_eq!(
+        observed, expected,
+        "scope conservation violated: observed {observed} != expected {expected}",
+    );
+}
+
+const TEST_TIME: std::time::Duration = std::time::Duration::from_secs(10);
+
+fn fuzz_test<Arg: Clone + TypeGenerator + RefUnwindSafe + std::fmt::Debug>(
+    test: impl Fn(Arg) + RefUnwindSafe,
+) {
+    bolero::check!()
+        .with_type()
+        .cloned()
+        .with_test_time(TEST_TIME)
+        .for_each(test);
+}
+
+#[test]
+#[cfg(feature = "shuttle")]
+fn scope_conservation_under_shuttle() {
+    fuzz_test(|plan: Plan| shuttle::check_random(move || run_plan(&plan), 1));
+}
+
+#[test]
+#[cfg(feature = "shuttle")]
+fn scope_conservation_under_shuttle_pct() {
+    fuzz_test(|plan: Plan| {
+        // PCT requires every thread to do at least one atomic op;
+        // skip degenerate shapes that wouldn't exercise concurrency.
+        let nontrivial = plan
+            .threads
+            .iter()
+            .take(MAX_THREADS)
+            .filter(|tp| !tp.increments.is_empty())
+            .count();
+        if nontrivial < 2 {
+            return;
+        }
+        shuttle::check_pct(move || run_plan(&plan), 16, 3);
+    });
+}
+
+#[test]
+#[cfg(not(feature = "shuttle"))]
+fn scope_conservation_under_std() {
+    fuzz_test(|plan: Plan| run_plan(&plan));
+}
diff --git a/concurrency/tests/stress_dispatch.rs b/concurrency/tests/stress_dispatch.rs
new file mode 100644
index 0000000000..aebda65798
--- /dev/null
+++ b/concurrency/tests/stress_dispatch.rs
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Tests for `concurrency::stress` backend dispatch.
+//!
+//! `stress(body)` is the small router that `#[concurrency::test]`
+//! expands to: it picks one of `loom::model`,
+//! `shuttle::check_random` / `_pct` / `_dfs`, or direct `body()` based
+//! on the active backend's feature. The dispatch table lives in
+//! `concurrency/src/stress.rs`.
+//!
+//! This file pins two coarse but important properties:
+//!
+//! 1. On the default backend, `stress` invokes `body` exactly once.
+//!    There is no scheduling exploration; the call should round-trip
+//!    untouched.
+//!
+//! 2. On `loom` or `shuttle` (random scheduler), `stress` invokes
+//!    `body` more than once -- the backend explores multiple
+//!    schedules / interleavings. Exact counts depend on the backend's
+//!    internal iteration budget and can change; the test only asserts
+//!    the contract that exploration actually happens.
+//!
+//! PCT and DFS are skipped: PCT panics on test bodies that do no
+//! concurrent work *on the main thread*, and DFS returns after a
+//! single iteration in the schedule we hand it. Both are valid
+//! shuttle schedulers but stricter than `check_random`; the dispatch
+//! contract is the same for all three, so verifying it under
+//! `shuttle` + `loom` is enough.
+
+// With the `shuttle_dfs -> shuttle_pct -> shuttle` chain in
+// `Cargo.toml`, `not(feature = "shuttle_pct")` is true exactly when
+// neither PCT nor DFS is selected.
+#![cfg(not(feature = "shuttle_pct"))]
+
+extern crate dataplane_concurrency as concurrency;
+
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use concurrency::thread;
+
+// The invocation counter is a plain `static AtomicUsize`, not a
+// `concurrency::sync::*` primitive. Two reasons:
+//
+//   * Under loom / shuttle, `concurrency::sync::*` panics when accessed
+//     from outside the model checker's execution context (which is
+//     where the test body itself reads the counter, *after* stress
+//     returns).
+//   * A `static` is the simplest thing that works from inside and
+//     outside the body. The test counts invocations *across* the
+//     whole `stress()` call, not per-iteration, so contention is fine.
+//
+// Each test resets the counter to 0 before invoking `stress` so the
+// tests don't have hidden coupling.
+
+fn run_dispatch_check() -> usize {
+    static INVOCATIONS: AtomicUsize = AtomicUsize::new(0);
+    INVOCATIONS.store(0, Ordering::SeqCst);
+    concurrency::stress(|| {
+        INVOCATIONS.fetch_add(1, Ordering::SeqCst);
+        // PCT panics on bodies that do no concurrent work, so spawn
+        // one thread that performs one atomic op via the active
+        // backend's primitives.
+        let scratch = concurrency::sync::Arc::new(concurrency::sync::atomic::AtomicUsize::new(0));
+        let scratch_for_thread = concurrency::sync::Arc::clone(&scratch);
+        thread::scope(|s| {
+            s.spawn(move || {
+                scratch_for_thread.fetch_add(1, concurrency::sync::atomic::Ordering::SeqCst);
+            });
+        });
+    });
+    INVOCATIONS.load(Ordering::SeqCst)
+}
+
+#[test]
+#[cfg(not(any(feature = "loom", feature = "shuttle")))]
+fn default_backend_invokes_body_exactly_once() {
+    let invocations = run_dispatch_check();
+    assert_eq!(
+        invocations, 1,
+        "default-backend stress should invoke body exactly once",
+    );
+}
+
+#[test]
+#[cfg(any(feature = "loom", feature = "shuttle"))]
+fn model_check_backend_invokes_body_more_than_once() {
+    let invocations = run_dispatch_check();
+    assert!(
+        invocations > 1,
+        "model-check backend stress should invoke body more than once \
+         (exploring schedules); observed {invocations}",
+    );
+}
+
+// `#[concurrency::test]` emits `#[::core::prelude::v1::test]` BEFORE
+// the captured `#(#attrs)*`. These two tests pin that user-supplied
+// `#[should_panic]` / `#[ignore]` attributes still attach to the
+// synthesised function -- a future macro refactor that reorders the
+// emitted attributes (or swallows them) breaks here loudly instead
+// of silently turning real test signals into no-ops.
+
+#[cfg(not(feature = "shuttle_pct"))]
+#[concurrency::test]
+#[should_panic(expected = "intentional")]
+fn should_panic_attribute_attaches() {
+    panic!("intentional");
+}
+
+#[cfg(not(any(feature = "loom", feature = "shuttle_pct")))]
+#[concurrency::test]
+#[ignore = "verifies #[ignore] threads through; not run by default"]
+fn ignore_attribute_attaches() {
+    panic!("test body must not run when #[ignore] is honoured");
+}
diff --git a/concurrency/tests/thread_scope.rs b/concurrency/tests/thread_scope.rs
new file mode 100644
index 0000000000..6586b3fcdc
--- /dev/null
+++ b/concurrency/tests/thread_scope.rs
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Direct coverage for `concurrency::thread::scope` -- the loom shim
+//! in particular, but the tests pass under every backend.
+//!
+//! Loom 0.7 does not ship `thread::scope`. The crate provides one in
+//! `concurrency/src/thread/loom_scope.rs` built on `loom::spawn` plus
+//! an `Arc<Mutex<Option<T>>>` keepalive pattern that preserves the
+//! drop-affinity guarantee `std::thread::scope` offers.
+//!
+//! The shim is exercised indirectly by `tests/quiescent_model.rs`, but
+//! those tests would surface failures as quiescent-protocol bugs rather
+//! than as localised shim bugs. The tests in this file pin the
+//! `thread::scope` contract itself so a future regression in the shim
+//! fails here loudly and at the right layer.
+//!
+//! The same source runs under every backend via `#[concurrency::test]`,
+//! and on the default and shuttle backends it exercises the *real*
+//! `std::thread::scope` / `shuttle::thread::scope` -- which is the
+//! point: the contract is the same; only the *internals* differ.
+//!
+//! Run under loom (the headline use case) with:
+//!
+//! ```sh
+//! cargo test --release -p dataplane-concurrency --features loom --test thread_scope
+//! ```
+
+extern crate dataplane_concurrency as concurrency;
+
+use concurrency::sync::Arc;
+use concurrency::sync::atomic::{AtomicUsize, Ordering};
+use concurrency::thread;
+
+// Several tests below have the spawn-and-wait shape ("main spawns,
+// joins via the implicit auto-join, reads only after scope returns"),
+// which PCT counts as "the main thread did no concurrent work" and
+// panics on. Same approach `quiescent_model.rs` takes for its
+// single-threaded `snapshot_after_publish_observes_published` test.
+// Tests with two or more spawns issuing atomic ops (e.g.
+// `multiple_spawns_all_join_before_return`) are PCT-compatible.
+
+/// `scope()` returns the body's value.
+#[cfg(not(feature = "shuttle_pct"))]
+#[concurrency::test]
+fn scope_returns_body_value() {
+    let v = thread::scope(|_| 42u32);
+    assert_eq!(v, 42);
+}
+
+/// A single spawned thread is joined before `scope()` returns; the
+/// `AtomicUsize` it wrote is visible to the caller (Acquire on join).
+#[cfg(not(feature = "shuttle_pct"))]
+#[concurrency::test]
+fn single_spawn_joins_before_return() {
+    let counter = Arc::new(AtomicUsize::new(0));
+    let counter_for_thread = Arc::clone(&counter);
+    thread::scope(|s| {
+        s.spawn(move || {
+            counter_for_thread.fetch_add(1, Ordering::SeqCst);
+        });
+    });
+    assert_eq!(counter.load(Ordering::SeqCst), 1);
+}
+
+/// Multiple spawned threads all join before `scope()` returns.
+#[concurrency::test]
+fn multiple_spawns_all_join_before_return() {
+    let counter = Arc::new(AtomicUsize::new(0));
+    thread::scope(|s| {
+        let c1 = Arc::clone(&counter);
+        s.spawn(move || {
+            c1.fetch_add(1, Ordering::SeqCst);
+        });
+        let c2 = Arc::clone(&counter);
+        s.spawn(move || {
+            c2.fetch_add(1, Ordering::SeqCst);
+        });
+    });
+    assert_eq!(counter.load(Ordering::SeqCst), 2);
+}
+
+/// `ScopedJoinHandle::join` returns the spawned thread's value.
+#[cfg(not(feature = "shuttle_pct"))]
+#[concurrency::test]
+fn explicit_join_returns_value() {
+    thread::scope(|s| {
+        let h = s.spawn(|| 99u32);
+        let v = h.join().expect("spawned thread did not panic");
+        assert_eq!(v, 99);
+    });
+}
+
+/// Spawned closures may borrow data of any lifetime that outlives the
+/// scope -- the headline `std::thread::scope` guarantee. Under loom
+/// this is the shim's `mem::transmute` doing its job.
+#[cfg(not(feature = "shuttle_pct"))]
+#[concurrency::test]
+fn spawn_can_borrow_from_enclosing_scope() {
+    let counter = Arc::new(AtomicUsize::new(0));
+    // `local` is owned by the test body; it lives in the enclosing
+    // stack frame. The spawn closure borrows it by reference, which
+    // would not compile on plain `thread::spawn` (no `'static`).
+    let local = 7u32;
+    let local_ref = &local;
+    thread::scope(|s| {
+        let c = Arc::clone(&counter);
+        s.spawn(move || {
+            c.store(*local_ref as usize, Ordering::SeqCst);
+        });
+    });
+    assert_eq!(counter.load(Ordering::SeqCst), 7);
+}
+
+/// Two spawns in the same scope, each writing a distinct value, both
+/// readable after `scope()` returns. Loom explores all interleavings of
+/// the two stores; under any of them, both values are eventually
+/// observed because both joins happen before `scope` returns.
+#[concurrency::test]
+fn two_spawns_independent_writes() {
+    let a = Arc::new(AtomicUsize::new(0));
+    let b = Arc::new(AtomicUsize::new(0));
+    thread::scope(|s| {
+        let a_for = Arc::clone(&a);
+        s.spawn(move || {
+            a_for.store(1, Ordering::SeqCst);
+        });
+        let b_for = Arc::clone(&b);
+        s.spawn(move || {
+            b_for.store(2, Ordering::SeqCst);
+        });
+    });
+    assert_eq!(a.load(Ordering::SeqCst), 1);
+    assert_eq!(b.load(Ordering::SeqCst), 2);
+}
+
+/// A scoped thread that itself calls `s.spawn(...)` on the parent
+/// scope pushes new entries onto the scope's `pending` queue after
+/// the parent thread has already entered the teardown drain. The
+/// shim must keep draining until the queue stays empty across a full
+/// pass; otherwise the nested spawn's `JoinHandle` is leaked and the
+/// `'scope` -> `'static` transmute is unsound (the closure outlives
+/// `'scope`).
+#[concurrency::test]
+fn nested_scoped_spawn_is_joined() {
+    let outer_done = Arc::new(AtomicUsize::new(0));
+    let inner_done = Arc::new(AtomicUsize::new(0));
+    thread::scope(|s| {
+        let outer_for_thread = Arc::clone(&outer_done);
+        let inner_for_thread = Arc::clone(&inner_done);
+        s.spawn(move || {
+            // Re-enter `s` from inside an already-spawned scoped
+            // thread. The handle for this inner spawn is registered
+            // in the same `Scope`'s `pending` list, but it can land
+            // there after the parent thread has already taken a
+            // snapshot of `pending` to drain. The shim's teardown
+            // must keep looping until `pending` is empty across a
+            // full pass.
+            s.spawn(move || {
+                inner_for_thread.fetch_add(1, Ordering::SeqCst);
+            });
+            outer_for_thread.fetch_add(1, Ordering::SeqCst);
+        });
+    });
+    assert_eq!(
+        outer_done.load(Ordering::SeqCst),
+        1,
+        "outer scoped thread did not run to completion before scope returned",
+    );
+    assert_eq!(
+        inner_done.load(Ordering::SeqCst),
+        1,
+        "nested scoped thread did not run to completion before scope returned",
+    );
+}
+
+/// `Drop::drop` of a value moved into a spawned closure runs (at the
+/// latest) when the spawned thread is joined -- i.e. before `scope()`
+/// returns. Pinned via an `AtomicUsize` incremented from within the
+/// payload's `Drop` impl.
+#[cfg(not(feature = "shuttle_pct"))]
+#[concurrency::test]
+fn moved_value_drop_runs_before_scope_returns() {
+    struct Bump(Arc<AtomicUsize>);
+    impl Drop for Bump {
+        fn drop(&mut self) {
+            self.0.fetch_add(1, Ordering::SeqCst);
+        }
+    }
+    let bumps = Arc::new(AtomicUsize::new(0));
+    thread::scope(|s| {
+        let payload = Bump(Arc::clone(&bumps));
+        s.spawn(move || {
+            // Body consumes `payload` implicitly at end of scope.
+            let _keep = payload;
+        });
+    });
+    assert_eq!(bumps.load(Ordering::SeqCst), 1);
+}
diff --git a/dpdk-sys/build.rs b/dpdk-sys/build.rs
index e0c5a219c1..d204a7562d 100644
--- a/dpdk-sys/build.rs
+++ b/dpdk-sys/build.rs
@@ -93,6 +93,7 @@ fn main() {
         "rte_hash",
         "rte_rcu",
         "rte_ring",
+        "rte_acl",
         "rte_eal",
         "rte_argparse",
         "rte_kvargs",
diff --git a/dpdk/Cargo.toml b/dpdk/Cargo.toml
index 32b186c177..fdee013955 100644
--- a/dpdk/Cargo.toml
+++ b/dpdk/Cargo.toml
@@ -11,6 +11,7 @@ serde = ["dep:serde"]
 
 [dependencies]
 
+concurrency = { workspace = true }
 dpdk-sys = { workspace = true }
 errno = { workspace = true }
 net = { workspace = true }
@@ -21,3 +22,9 @@ tracing = { workspace = true, features = ["attributes"] }
 
 [build-dependencies]
 dpdk-sysroot-helper = { workspace = true }
+
+[dev-dependencies]
+id = { workspace = true }
+
+bolero = { workspace = true, default-features = false, features = ["std"] }
+nix = { workspace = true, features = ["sched"] }
diff --git a/dpdk/src/acl/classify.rs b/dpdk/src/acl/classify.rs
new file mode 100644
index 0000000000..37539ee6f1
--- /dev/null
+++ b/dpdk/src/acl/classify.rs
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! ACL classification algorithm selection.
+//!
+//! DPDK provides multiple SIMD-accelerated implementations of its ACL classification engine.
+//! The [`ClassifyAlgorithm`] enum exposes these as a safe Rust type that can be used with
+//! [`AclContext::classify_with_algorithm`][super::context::AclContext] or
+//! [`AclContext::set_default_algorithm`][super::context::AclContext].
+//!
+//! In most cases [`ClassifyAlgorithm::Default`] is the right choice -- DPDK will automatically
+//! select the best implementation for the current CPU at build time.  Explicit selection is useful
+//! for benchmarking or for targeting a specific code path.
+
+use core::fmt::{self, Display, Formatter};
+
+// ---------------------------------------------------------------------------
+// ClassifyAlgorithm
+// ---------------------------------------------------------------------------
+
+/// SIMD implementation to use for ACL classification.
+///
+/// Maps 1:1 to the `RTE_ACL_CLASSIFY_*` constants in
+/// [`rte_acl_classify_alg`][mod@dpdk_sys::rte_acl_classify_alg].
+///
+/// # Platform support
+///
+/// Not every variant is available on every CPU.  Requesting an unsupported algorithm will result
+/// in an error from [`rte_acl_classify_alg`][fn@dpdk_sys::rte_acl_classify_alg] or
+/// [`rte_acl_set_ctx_classify`][dpdk_sys::rte_acl_set_ctx_classify].
+/// [`Default`][ClassifyAlgorithm::Default] is always available and is recommended unless you have
+/// a specific reason to select a particular implementation.
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Default)]
+pub enum ClassifyAlgorithm {
+    /// Let DPDK choose the best available implementation for the current CPU.
+    ///
+    /// This is almost always what you want.
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_CLASSIFY_DEFAULT`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_DEFAULT].
+    ///
+    /// # Asymmetry between the two DPDK entry points
+    ///
+    /// `Default` carries different meaning across DPDK's two algorithm-selection paths:
+    ///
+    /// - [`rte_acl_set_ctx_classify(ctx, DEFAULT)`][dpdk_sys::rte_acl_set_ctx_classify]
+    ///   expands `DEFAULT` to the best available implementation on the current
+    ///   CPU (this is the "DPDK choose best" semantics).
+    /// - [`rte_acl_classify_alg(ctx, ..., DEFAULT)`][fn@dpdk_sys::rte_acl_classify_alg]
+    ///   indexes table slot 0 in the dispatch table, which is the scalar
+    ///   implementation -- *not* "DPDK choose best".
+    ///
+    /// To make `Default` mean the same thing through either Rust entry point,
+    /// [`AclContext::classify_with_algorithm`][super::context::AclContext::classify_with_algorithm]
+    /// special-cases `Default` to dispatch via
+    /// [`rte_acl_classify`][dpdk_sys::rte_acl_classify] (which uses the
+    /// context's currently-set algorithm) instead of through
+    /// `rte_acl_classify_alg`.  Use [`Scalar`][ClassifyAlgorithm::Scalar]
+    /// explicitly if you want the scalar implementation.
+    #[default]
+    Default = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_DEFAULT,
+
+    /// Portable scalar (non-SIMD) implementation.
+    ///
+    /// Available on all platforms.  Useful as a baseline for benchmarks.
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_CLASSIFY_SCALAR`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_SCALAR].
+    Scalar = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_SCALAR,
+
+    /// SSE 4.1 vectorized implementation.
+    ///
+    /// Requires x86-64 SSE 4.1 support.
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_CLASSIFY_SSE`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_SSE].
+    Sse = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_SSE,
+
+    /// AVX2 vectorized implementation.
+    ///
+    /// Requires x86-64 AVX2 support.
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_CLASSIFY_AVX2`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX2].
+    Avx2 = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX2,
+
+    /// ARM NEON vectorized implementation.
+    ///
+    /// Requires AArch64 NEON support.
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_CLASSIFY_NEON`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_NEON].
+    Neon = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_NEON,
+
+    /// PowerPC AltiVec vectorized implementation.
+    ///
+    /// Requires PowerPC AltiVec / VMX support.
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_CLASSIFY_ALTIVEC`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_ALTIVEC].
+    Altivec = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_ALTIVEC,
+
+    /// AVX-512 vectorized implementation processing 16 flows in parallel.
+    ///
+    /// Requires x86-64 AVX-512 support (specifically AVX-512BW).
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_CLASSIFY_AVX512X16`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX512X16].
+    Avx512x16 = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX512X16,
+
+    /// AVX-512 vectorized implementation processing 32 flows in parallel.
+    ///
+    /// Requires x86-64 AVX-512 support (specifically AVX-512BW).
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_CLASSIFY_AVX512X32`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX512X32].
+    Avx512x32 = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX512X32,
+}
+
+impl ClassifyAlgorithm {
+    /// Convert to the raw `u32` discriminant value expected by the DPDK C API.
+    #[must_use]
+    #[inline]
+    pub const fn as_u32(self) -> u32 {
+        self as u32
+    }
+
+    /// Attempt to parse a raw `u32` into a [`ClassifyAlgorithm`].
+    ///
+    /// Returns `None` if the value does not correspond to a known algorithm.
+    /// See also the [`TryFrom<u32>`] impl, which is the same operation framed as the
+    /// idiomatic conversion trait.
+    #[must_use]
+    pub const fn from_u32(value: u32) -> Option<Self> {
+        match value {
+            x if x == Self::Default as u32 => Some(Self::Default),
+            x if x == Self::Scalar as u32 => Some(Self::Scalar),
+            x if x == Self::Sse as u32 => Some(Self::Sse),
+            x if x == Self::Avx2 as u32 => Some(Self::Avx2),
+            x if x == Self::Neon as u32 => Some(Self::Neon),
+            x if x == Self::Altivec as u32 => Some(Self::Altivec),
+            x if x == Self::Avx512x16 as u32 => Some(Self::Avx512x16),
+            x if x == Self::Avx512x32 as u32 => Some(Self::Avx512x32),
+            _ => None,
+        }
+    }
+
+    /// Returns `true` if this is an x86-64 specific algorithm variant.
+    #[must_use]
+    pub const fn is_x86_64(&self) -> bool {
+        matches!(
+            self,
+            Self::Sse | Self::Avx2 | Self::Avx512x16 | Self::Avx512x32
+        )
+    }
+
+    /// Returns `true` if this is an ARM specific algorithm variant.
+    #[must_use]
+    pub const fn is_aarch64(&self) -> bool {
+        matches!(self, Self::Neon)
+    }
+
+    /// Returns `true` if this is a PowerPC specific algorithm variant.
+    #[must_use]
+    pub const fn is_powerpc(&self) -> bool {
+        matches!(self, Self::Altivec)
+    }
+
+    /// Returns `true` if this is a platform-independent variant.
+    #[must_use]
+    pub const fn is_portable(&self) -> bool {
+        matches!(self, Self::Default | Self::Scalar)
+    }
+}
+
+impl Display for ClassifyAlgorithm {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Default => write!(f, "Default"),
+            Self::Scalar => write!(f, "Scalar"),
+            Self::Sse => write!(f, "SSE"),
+            Self::Avx2 => write!(f, "AVX2"),
+            Self::Neon => write!(f, "NEON"),
+            Self::Altivec => write!(f, "AltiVec"),
+            Self::Avx512x16 => write!(f, "AVX-512 (x16)"),
+            Self::Avx512x32 => write!(f, "AVX-512 (x32)"),
+        }
+    }
+}
+
+impl From<ClassifyAlgorithm> for dpdk_sys::rte_acl_classify_alg::Type {
+    #[inline]
+    fn from(alg: ClassifyAlgorithm) -> Self {
+        alg.as_u32()
+    }
+}
+
+/// Unknown algorithm discriminant returned by [`ClassifyAlgorithm::try_from`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
+#[error("unknown rte_acl_classify_alg discriminant {0}")]
+pub struct UnknownClassifyAlgorithm(pub u32);
+
+impl TryFrom<u32> for ClassifyAlgorithm {
+    type Error = UnknownClassifyAlgorithm;
+    fn try_from(value: u32) -> Result<Self, Self::Error> {
+        Self::from_u32(value).ok_or(UnknownClassifyAlgorithm(value))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Compile-time assertions
+// ---------------------------------------------------------------------------
+
+/// Verify that our enum discriminants match the DPDK constants exactly.
+const _: () = {
+    use dpdk_sys::rte_acl_classify_alg::*;
+
+    assert!(ClassifyAlgorithm::Default as u32 == RTE_ACL_CLASSIFY_DEFAULT);
+    assert!(ClassifyAlgorithm::Scalar as u32 == RTE_ACL_CLASSIFY_SCALAR);
+    assert!(ClassifyAlgorithm::Sse as u32 == RTE_ACL_CLASSIFY_SSE);
+    assert!(ClassifyAlgorithm::Avx2 as u32 == RTE_ACL_CLASSIFY_AVX2);
+    assert!(ClassifyAlgorithm::Neon as u32 == RTE_ACL_CLASSIFY_NEON);
+    assert!(ClassifyAlgorithm::Altivec as u32 == RTE_ACL_CLASSIFY_ALTIVEC);
+    assert!(ClassifyAlgorithm::Avx512x16 as u32 == RTE_ACL_CLASSIFY_AVX512X16);
+    assert!(ClassifyAlgorithm::Avx512x32 as u32 == RTE_ACL_CLASSIFY_AVX512X32);
+};
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn default_is_zero() {
+        assert_eq!(ClassifyAlgorithm::Default.as_u32(), 0);
+        assert_eq!(ClassifyAlgorithm::default(), ClassifyAlgorithm::Default);
+    }
+
+    #[test]
+    fn round_trip_all_variants() {
+        let variants = [
+            ClassifyAlgorithm::Default,
+            ClassifyAlgorithm::Scalar,
+            ClassifyAlgorithm::Sse,
+            ClassifyAlgorithm::Avx2,
+            ClassifyAlgorithm::Neon,
+            ClassifyAlgorithm::Altivec,
+            ClassifyAlgorithm::Avx512x16,
+            ClassifyAlgorithm::Avx512x32,
+        ];
+        for variant in variants {
+            let raw = variant.as_u32();
+            let parsed = ClassifyAlgorithm::from_u32(raw);
+            assert_eq!(parsed, Some(variant), "round-trip failed for {variant}");
+        }
+    }
+
+    #[test]
+    fn from_u32_rejects_unknown() {
+        assert_eq!(ClassifyAlgorithm::from_u32(99), None);
+        assert_eq!(ClassifyAlgorithm::from_u32(u32::MAX), None);
+    }
+
+    #[test]
+    fn display_all_variants() {
+        let display_strings = [
+            (ClassifyAlgorithm::Default, "Default"),
+            (ClassifyAlgorithm::Scalar, "Scalar"),
+            (ClassifyAlgorithm::Sse, "SSE"),
+            (ClassifyAlgorithm::Avx2, "AVX2"),
+            (ClassifyAlgorithm::Neon, "NEON"),
+            (ClassifyAlgorithm::Altivec, "AltiVec"),
+            (ClassifyAlgorithm::Avx512x16, "AVX-512 (x16)"),
+            (ClassifyAlgorithm::Avx512x32, "AVX-512 (x32)"),
+        ];
+        for (variant, expected) in display_strings {
+            assert_eq!(format!("{variant}"), expected);
+        }
+    }
+
+    #[test]
+    fn platform_classification() {
+        assert!(ClassifyAlgorithm::Default.is_portable());
+        assert!(ClassifyAlgorithm::Scalar.is_portable());
+
+        assert!(ClassifyAlgorithm::Sse.is_x86_64());
+        assert!(ClassifyAlgorithm::Avx2.is_x86_64());
+        assert!(ClassifyAlgorithm::Avx512x16.is_x86_64());
+        assert!(ClassifyAlgorithm::Avx512x32.is_x86_64());
+
+        assert!(ClassifyAlgorithm::Neon.is_aarch64());
+        assert!(ClassifyAlgorithm::Altivec.is_powerpc());
+
+        // Cross-checks: portable variants should not be platform-specific.
+        assert!(!ClassifyAlgorithm::Default.is_x86_64());
+        assert!(!ClassifyAlgorithm::Default.is_aarch64());
+        assert!(!ClassifyAlgorithm::Default.is_powerpc());
+
+        // Platform-specific variants should not be portable.
+        assert!(!ClassifyAlgorithm::Sse.is_portable());
+        assert!(!ClassifyAlgorithm::Neon.is_portable());
+        assert!(!ClassifyAlgorithm::Altivec.is_portable());
+    }
+
+    #[test]
+    fn into_dpdk_type() {
+        let alg = ClassifyAlgorithm::Avx2;
+        let raw: dpdk_sys::rte_acl_classify_alg::Type = alg.into();
+        assert_eq!(raw, dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX2);
+    }
+
+    /// All known discriminants -- the universe `from_u32` must accept and the
+    /// universe `as_u32` round-trips through.
+    const KNOWN: &[ClassifyAlgorithm] = &[
+        ClassifyAlgorithm::Default,
+        ClassifyAlgorithm::Scalar,
+        ClassifyAlgorithm::Sse,
+        ClassifyAlgorithm::Avx2,
+        ClassifyAlgorithm::Neon,
+        ClassifyAlgorithm::Altivec,
+        ClassifyAlgorithm::Avx512x16,
+        ClassifyAlgorithm::Avx512x32,
+    ];
+
+    /// Property: for every `u32`, `from_u32` either round-trips through `as_u32`
+    /// (when the value is a known discriminant) or rejects with `None` (when it
+    /// is not).  Generalises the hand-rolled `round_trip_all_variants` test over
+    /// the entire `u32` domain.
+    #[test]
+    fn from_u32_round_trip_property() {
+        bolero::check!().with_type::<u32>().for_each(
+            |value: &u32| match ClassifyAlgorithm::from_u32(*value) {
+                Some(alg) => assert_eq!(
+                    alg.as_u32(),
+                    *value,
+                    "from_u32({value}) -> {alg:?} but {alg:?}.as_u32() = {}",
+                    alg.as_u32()
+                ),
+                None => {
+                    for variant in KNOWN {
+                        assert_ne!(
+                            variant.as_u32(),
+                            *value,
+                            "from_u32({value}) returned None but {variant:?} has that discriminant"
+                        );
+                    }
+                }
+            },
+        );
+    }
+
+    /// Property: `TryFrom<u32>` matches `from_u32` exactly.
+    #[test]
+    fn try_from_matches_from_u32() {
+        bolero::check!().with_type::<u32>().for_each(|value: &u32| {
+            let opt = ClassifyAlgorithm::from_u32(*value);
+            let res = ClassifyAlgorithm::try_from(*value).ok();
+            assert_eq!(opt, res);
+        });
+    }
+}
diff --git a/dpdk/src/acl/config.rs b/dpdk/src/acl/config.rs
new file mode 100644
index 0000000000..b446c303dc
--- /dev/null
+++ b/dpdk/src/acl/config.rs
@@ -0,0 +1,1734 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! ACL configuration types.
+//!
+//! This module provides safe, validated configuration types for the two main ACL setup calls:
+//!
+//! - [`AclCreateParams`] -- parameters for creating an ACL context
+//!   ([`rte_acl_create`][dpdk_sys::rte_acl_create]).
+//! - [`AclBuildConfig`]`<N>` -- parameters for compiling rules into runtime lookup structures
+//!   ([`rte_acl_build`][dpdk_sys::rte_acl_build]).
+//!
+//! Following the project convention of validating inputs at the boundary, both types perform
+//! validation at construction time so that downstream code can assume the configuration is valid.
+
+use core::ffi::CStr;
+use core::fmt::{self, Display};
+use core::marker::PhantomData;
+use core::num::NonZero;
+
+use std::ffi::CString;
+
+use tracing::debug;
+
+use crate::socket::SocketId;
+
+use super::error::InvalidAclName;
+use super::field::FieldDef;
+use super::rule::Rule;
+
+// ---------------------------------------------------------------------------
+// AclCreateParams
+// ---------------------------------------------------------------------------
+
+/// Validated parameters for creating an ACL context with `N` fields per rule.
+///
+/// This is the safe Rust equivalent of [`rte_acl_param`][dpdk_sys::rte_acl_param].
+/// The name is validated at construction time and stored as a [`CString`] for zero-cost FFI.
+///
+/// # Why the const generic is on the type, not the constructor
+///
+/// `N` lives on the type so that
+/// [`AclContext::<N>::new`][super::context::AclContext::new] can require
+/// `AclCreateParams<N>` with the **same** `N`.  Erasing `N` after construction
+/// would let `AclContext::<3>::new(AclCreateParams::<5>::new(...))` compile
+/// while DPDK strides through rules at `rule_size = size_of::<Rule<5>>()` over
+/// `Rule<3>`-sized slots -- the exact OOB read the const generic is meant to
+/// rule out.  Keeping `N` on the type closes that gap statically and is
+/// consistent with how [`AclBuildConfig<N>`] is parameterised.
+///
+/// # Construction
+///
+/// Use [`AclCreateParams::<N>::new`][AclCreateParams::new] to create a validated instance.
+///
+/// ```ignore
+/// let params = AclCreateParams::<5>::new("my_acl", SocketId::ANY, NonZero::new(1024).unwrap())?;
+/// ```
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct AclCreateParams<const N: usize> {
+    /// Validated ACL context name (ASCII, non-empty, no null bytes, within length limit).
+    name: CString,
+    /// NUMA socket on which to allocate the context's memory.
+    socket_id: SocketId,
+    /// Maximum number of rules this context can hold.  Non-zero: a context that
+    /// cannot hold any rules is useless and `rte_acl_create` rejects it with
+    /// `EINVAL`.
+    max_rule_num: NonZero<u32>,
+    /// Size of each rule in bytes -- equal to
+    /// [`Rule::<N>::RULE_SIZE`][Rule::RULE_SIZE].  Stored as
+    /// [`NonZero<u32>`] because `N > 0` implies `size_of::<Rule<N>>() > 0`,
+    /// and a zero `rule_size` would be rejected by DPDK with `EINVAL`.
+    rule_size: NonZero<u32>,
+    /// Carries `N` on the type without taking up space.
+    _phantom: PhantomData<[(); N]>,
+}
+
+/// The maximum length (in bytes, **excluding** the null terminator) of an ACL context name.
+///
+/// DPDK's [`RTE_ACL_NAMESIZE`][dpdk_sys::RTE_ACL_NAMESIZE] includes the null terminator, so the
+/// usable string length is one less.
+pub const MAX_ACL_NAME_LEN: usize = (dpdk_sys::RTE_ACL_NAMESIZE as usize).saturating_sub(1);
+
+impl<const N: usize> AclCreateParams<N> {
+    /// Compile-time guard: `N == 0` is rejected here so that
+    /// [`AclContext::<0, _>`][super::context::AclContext] is unconstructable
+    /// via the public API.  Forced to evaluate in `new` via a let-binding.
+    const _CHECK_N_NONZERO: () = assert!(N > 0, "AclCreateParams requires N > 0");
+
+    /// Compile-time guard: `N` must not exceed
+    /// [`MAX_FIELDS`][super::config::MAX_FIELDS] (DPDK's
+    /// `RTE_ACL_MAX_FIELDS` = 64).  Larger `N` would also be rejected by
+    /// [`AclBuildConfig::new`][super::config::AclBuildConfig::new], but
+    /// must be rejected **here** first: `Rule::<N>::RULE_SIZE`
+    /// computes `size_of::<Rule<N>>() as u32`, and for very large `N`
+    /// the cast can wrap to `0`, after which the `NonZero::new_unchecked`
+    /// below would invoke undefined behaviour.  Capping `N` at
+    /// `MAX_FIELDS` keeps `size_of::<Rule<N>>()` well under `u32::MAX`
+    /// (it is at most 16 + 16 * 64 = 1040 bytes), so the cast is exact
+    /// and non-zero.
+    const _CHECK_N_FITS_U32_RULE_SIZE: () = assert!(
+        N <= MAX_FIELDS,
+        "AclCreateParams requires N <= RTE_ACL_MAX_FIELDS (64); larger N would \
+         truncate size_of::<Rule<N>>() during the u32 cast and risk UB."
+    );
+
+    /// Create validated ACL creation parameters.
+    ///
+    /// `N` (on the type) must match the number of [`FieldDef`] entries that
+    /// will be used when building the context, as well as the number of
+    /// fields in every [`Rule<N>`][Rule] added to the context.  It is used
+    /// here to compute the `rule_size` that DPDK requires at creation time.
+    ///
+    /// # Arguments
+    ///
+    /// * `name` -- human-readable name for the context.  Must be non-empty ASCII without null
+    ///   bytes, at most [`MAX_ACL_NAME_LEN`] bytes long.
+    /// * `socket_id` -- the NUMA socket to allocate memory on.  Use [`SocketId::ANY`] if you don't
+    ///   have a preference.
+    /// * `max_rule_num` -- the maximum number of rules this context will hold.
+    ///   Non-zero by type; a context that cannot hold any rules has no use and
+    ///   DPDK rejects it with `EINVAL`.
+    ///
+    /// # Compile-time checks
+    ///
+    /// `N == 0` is rejected by `_CHECK_N_NONZERO`; `N > MAX_FIELDS` is
+    /// rejected by `_CHECK_N_FITS_U32_RULE_SIZE`.  Both are evaluated at
+    /// monomorphisation time via let-bindings in this function.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`InvalidAclName`] if the name fails validation.
+    #[cold]
+    #[tracing::instrument(level = "debug", skip(name), fields(name = name.as_ref()))]
+    pub fn new(
+        name: impl AsRef<str>,
+        socket_id: SocketId,
+        max_rule_num: NonZero<u32>,
+    ) -> Result<Self, InvalidAclName> {
+        // Force evaluation of both const assertions for this monomorphisation.
+        let () = Self::_CHECK_N_NONZERO;
+        let () = Self::_CHECK_N_FITS_U32_RULE_SIZE;
+
+        let name = Self::validate_name(name.as_ref())?;
+        // `Rule::<N>::RULE_SIZE == size_of::<Rule<N>>() as u32`.  The
+        // two const assertions above guarantee `0 < N <= MAX_FIELDS`,
+        // so `size_of::<Rule<N>>()` is in `[28, 1040]` -- well under
+        // `u32::MAX`, and certainly non-zero.  The `unreachable!()`
+        // arm is therefore dead; we surface it as a panic rather than
+        // `unsafe { new_unchecked }` so that a broken invariant
+        // faults loudly instead of being undefined behaviour.
+        let rule_size = match NonZero::new(Rule::<N>::RULE_SIZE) {
+            Some(nz) => nz,
+            None => unreachable!(),
+        };
+        debug!(
+            "Created ACL params: name={}, socket_id={:?}, max_rule_num={}, rule_size={}",
+            name.to_str().unwrap_or("<invalid>"),
+            socket_id,
+            max_rule_num,
+            rule_size,
+        );
+        Ok(Self {
+            name,
+            socket_id,
+            max_rule_num,
+            rule_size,
+            _phantom: PhantomData,
+        })
+    }
+
+    /// Validate and convert an ACL context name to a [`CString`].
+    #[cold]
+    fn validate_name(name: &str) -> Result<CString, InvalidAclName> {
+        if name.is_empty() {
+            return Err(InvalidAclName::Empty);
+        }
+        if !name.is_ascii() {
+            return Err(InvalidAclName::NotAscii);
+        }
+        if name.len() > MAX_ACL_NAME_LEN {
+            return Err(InvalidAclName::TooLong {
+                len: name.len(),
+                max: MAX_ACL_NAME_LEN,
+            });
+        }
+        CString::new(name).map_err(|_| InvalidAclName::ContainsNullBytes)
+    }
+
+    /// Get the context name as a `&str`.
+    #[must_use]
+    pub fn name(&self) -> &str {
+        // SAFETY: The name is validated as ASCII at construction time and therefore is
+        // also valid UTF-8.  `self.name` is a `CString`, so `to_bytes()` excludes the
+        // trailing NUL.
+        unsafe { core::str::from_utf8_unchecked(self.name.to_bytes()) }
+    }
+
+    /// Get the name as a [`CString`] reference, suitable for FFI.
+    #[must_use]
+    pub fn name_cstr(&self) -> &CStr {
+        &self.name
+    }
+
+    /// Get the NUMA socket preference.
+    #[must_use]
+    pub fn socket_id(&self) -> SocketId {
+        self.socket_id
+    }
+
+    /// Get the maximum rule count.
+    #[must_use]
+    pub fn max_rule_num(&self) -> NonZero<u32> {
+        self.max_rule_num
+    }
+
+    /// Get the per-rule byte size.
+    ///
+    /// This was computed from the const generic `N` at construction time and equals
+    /// `core::mem::size_of::<Rule<N>>()`.  Non-zero by type since `N > 0`.
+    #[must_use]
+    pub fn rule_size(&self) -> NonZero<u32> {
+        self.rule_size
+    }
+
+    /// Build the raw DPDK [`rte_acl_param`][dpdk_sys::rte_acl_param], borrowed from `self`.
+    ///
+    /// The returned [`RawParams`] holds a `rte_acl_param` whose `name` pointer is
+    /// borrowed from `self.name`.  The lifetime on [`RawParams`] ties the raw
+    /// struct to `&self`, preventing use-after-free if `self` is dropped before
+    /// the FFI call completes.
+    pub(crate) fn to_raw(&self) -> RawParams<'_> {
+        // Cast rationale for `socket_id`:
+        //
+        // [`SocketId`] wraps a `c_uint`, but DPDK's
+        // [`rte_acl_param`][dpdk_sys::rte_acl_param] field is `c_int`.
+        // The cast is exact for the two value classes that ever appear in
+        // a valid `SocketId`:
+        //
+        // - [`SocketId::ANY`][crate::socket::SocketId::ANY] is defined as
+        //   `c_uint::MAX`, which two's-complement-casts to `-1` --
+        //   precisely DPDK's `SOCKET_ID_ANY` sentinel.
+        // - Real NUMA socket IDs are small non-negative integers
+        //   (`< RTE_MAX_NUMA_NODES`, currently 32), safely representable
+        //   in `c_int`.
+        //
+        // No value class produces silent wraparound here, so the `as`
+        // cast is sound without a runtime check.
+        RawParams {
+            raw: dpdk_sys::rte_acl_param {
+                name: self.name.as_ptr(),
+                socket_id: self.socket_id.as_c_uint() as core::ffi::c_int,
+                rule_size: self.rule_size.get(),
+                max_rule_num: self.max_rule_num.get(),
+            },
+            _borrow: PhantomData,
+        }
+    }
+}
+
+/// A [`rte_acl_param`][dpdk_sys::rte_acl_param] that borrows its name pointer
+/// from an owning [`AclCreateParams`].
+///
+/// The lifetime parameter ensures that the FFI struct cannot outlive the
+/// [`AclCreateParams`] that owns the underlying C string.  Use [`as_ptr`] to
+/// pass the raw pointer into a DPDK call.
+///
+/// [`as_ptr`]: RawParams::as_ptr
+pub(crate) struct RawParams<'a> {
+    raw: dpdk_sys::rte_acl_param,
+    _borrow: PhantomData<&'a CStr>,
+}
+
+impl RawParams<'_> {
+    /// Get a pointer to the raw [`rte_acl_param`][dpdk_sys::rte_acl_param].
+    ///
+    /// The pointer is valid for as long as `self` lives.
+    #[inline]
+    pub(crate) fn as_ptr(&self) -> *const dpdk_sys::rte_acl_param {
+        &self.raw
+    }
+}
+
+impl<const N: usize> Display for AclCreateParams<N> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "AclCreateParams<{N}> {{ name: \"{}\", socket_id: {:?}, max_rule_num: {}, rule_size: {} }}",
+            self.name(),
+            self.socket_id,
+            self.max_rule_num,
+            self.rule_size,
+        )
+    }
+}
+
+// ---------------------------------------------------------------------------
+// AclBuildConfig
+// ---------------------------------------------------------------------------
+
+/// Maximum number of categories that can be used in an ACL context.
+///
+/// Corresponds to [`RTE_ACL_MAX_CATEGORIES`][dpdk_sys::RTE_ACL_MAX_CATEGORIES].
+pub const MAX_CATEGORIES: u32 = dpdk_sys::RTE_ACL_MAX_CATEGORIES;
+
+/// The required alignment factor for the number of categories.
+///
+/// The `num_categories` value must be either `1` or a multiple of this value.
+///
+/// Corresponds to [`RTE_ACL_RESULTS_MULTIPLIER`][dpdk_sys::RTE_ACL_RESULTS_MULTIPLIER].
+pub const RESULTS_MULTIPLIER: u32 = dpdk_sys::RTE_ACL_RESULTS_MULTIPLIER;
+
+/// Maximum number of fields per ACL rule.
+///
+/// Corresponds to [`RTE_ACL_MAX_FIELDS`][dpdk_sys::RTE_ACL_MAX_FIELDS].
+pub const MAX_FIELDS: usize = dpdk_sys::RTE_ACL_MAX_FIELDS as usize;
+
+/// Validated build configuration for compiling ACL rules into runtime lookup structures.
+///
+/// This is the safe Rust equivalent of [`rte_acl_config`][dpdk_sys::rte_acl_config].
+///
+/// The const generic `N` must match the `N` used in the [`AclContext`][super::context::AclContext]
+/// and in the [`Rule`]`<N>` type.  This is enforced by the type system -- the
+/// [`build`][super::context::AclContext::build] method requires an `AclBuildConfig` with the same
+/// `N` as the context.
+///
+/// # Validation
+///
+/// The constructor validates:
+/// - `N <= 64` ([`RTE_ACL_MAX_FIELDS`][dpdk_sys::RTE_ACL_MAX_FIELDS])
+/// - `num_categories` is between 1 and [`MAX_CATEGORIES`] (inclusive)
+/// - `num_categories` is 1 or a multiple of [`RESULTS_MULTIPLIER`]
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct AclBuildConfig<const N: usize> {
+    /// Number of categories to build with.
+    ///
+    /// Must be in `1..=`[`MAX_CATEGORIES`] and either `1` or a multiple of
+    /// [`RESULTS_MULTIPLIER`].
+    num_categories: u32,
+
+    /// Field definitions -- one per field in the rule.
+    ///
+    /// The order and semantics of these definitions must match the order of
+    /// [`AclField`][super::rule::AclField] entries in the [`Rule`]`<N>` instances added to the
+    /// context.
+    field_defs: [FieldDef; N],
+
+    /// Maximum memory size (in bytes) for the compiled runtime structures.
+    ///
+    /// Set to `0` to impose no limit.
+    max_size: usize,
+
+    /// Cached output of [`min_input_size`][AclBuildConfig::min_input_size].
+    ///
+    /// Computed once at construction; constant for the lifetime of the
+    /// config since `field_defs` cannot be mutated after `new` returns.
+    /// Avoids O(N^2) re-computation on every classify-time pre-flight.
+    min_input_size: usize,
+}
+
+/// Errors that can occur when constructing an [`AclBuildConfig`].
+#[derive(Debug, thiserror::Error, Copy, Clone, PartialEq, Eq)]
+pub enum InvalidAclBuildConfig {
+    /// `N` exceeds [`RTE_ACL_MAX_FIELDS`][dpdk_sys::RTE_ACL_MAX_FIELDS].
+    #[error("Too many fields: {num_fields} exceeds maximum of {max}")]
+    TooManyFields {
+        /// The number of fields that was requested.
+        num_fields: usize,
+        /// The maximum allowed.
+        max: usize,
+    },
+    /// One of the [`FieldDef`] entries has `field_index >= N`.  DPDK uses
+    /// `field_index` to look up each definition's value in the rule's field
+    /// array; an out-of-range index would read past `Rule<N>`.
+    #[error(
+        "FieldDef.field_index {field_index} is out of range for N = {n} \
+         (valid range: 0..{n})"
+    )]
+    FieldIndexOutOfRange {
+        /// The offending index.
+        field_index: u8,
+        /// The const-generic field count.
+        n: usize,
+    },
+    /// Two [`FieldDef`] entries share the same `field_index`.  Field indices
+    /// must be unique within the array.
+    #[error("FieldDef.field_index {field_index} appears more than once")]
+    DuplicateFieldIndex {
+        /// The duplicated index.
+        field_index: u8,
+    },
+    /// The first field definition does not match DPDK's requirements for
+    /// the trie's entry byte.
+    ///
+    /// DPDK requires the first field in `field_defs` to be **one byte
+    /// long**; it consumes that byte during trie construction.  The
+    /// wrapper additionally requires `input_index = 0` on the first
+    /// field as a convention -- it labels the entry byte as belonging
+    /// to the first input-index group, which simplifies the
+    /// runtime-load reasoning in `min_input_size`.
+    ///
+    /// The first field's **`offset` is unconstrained**: a non-zero
+    /// offset just means the input buffer has leading bytes before the
+    /// ACL key, and DPDK loads from `field_defs[0].offset` regardless.
+    /// `min_input_size` accounts for non-zero leading offsets via the
+    /// per-group load-endpoint formula.
+    #[error(
+        "the first FieldDef must be size = One and input_index = 0 \
+         (got size = {size:?}, input_index = {input_index})"
+    )]
+    InvalidFirstField {
+        /// The first field's declared size.
+        size: super::field::FieldSize,
+        /// The first field's declared `input_index`.
+        input_index: u8,
+    },
+
+    /// `num_categories` is zero.
+    #[error("Number of categories must be at least 1")]
+    ZeroCategories,
+
+    /// `num_categories` exceeds [`MAX_CATEGORIES`].
+    #[error("Number of categories {num_categories} exceeds maximum of {max}")]
+    TooManyCategories {
+        /// The requested number of categories.
+        num_categories: u32,
+        /// The maximum allowed.
+        max: u32,
+    },
+
+    /// `num_categories` is greater than 1 and not a multiple of [`RESULTS_MULTIPLIER`].
+    #[error("Number of categories {num_categories} must be 1 or a multiple of {multiplier}")]
+    CategoriesNotAligned {
+        /// The requested number of categories.
+        num_categories: u32,
+        /// The required alignment factor.
+        multiplier: u32,
+    },
+
+    /// A field whose `(offset, size)` extends past its `input_index`
+    /// group's 4-byte window.  DPDK's classify loop loads exactly 4
+    /// contiguous bytes per `input_index` group starting at the group's
+    /// lowest offset; any field spilling past that window would make
+    /// DPDK read bytes the caller never accounted for, undermining the
+    /// `min_input_size` safety contract.
+    #[error(
+        "FieldDef with input_index {input_index} spans beyond a 4-byte \
+         window: lowest offset is {group_offset}, but field_index \
+         {field_index} extends to offset {extent_end} (max allowed: \
+         {window_end})"
+    )]
+    InvalidInputIndexGrouping {
+        /// The offending `input_index`.
+        input_index: u8,
+        /// The lowest `offset` of any field in the group.
+        group_offset: u32,
+        /// The `field_index` of the field whose extent overruns the window.
+        field_index: u8,
+        /// `offset + size` of the offending field.
+        extent_end: u32,
+        /// `group_offset + 4`.
+        window_end: u32,
+    },
+
+    /// `input_index` 0 contains more than just the first field.
+    ///
+    /// DPDK reserves `input_index = 0` for the single 1-byte first field
+    /// (the trie entry byte).  No other field may share that group.
+    #[error(
+        "input_index 0 must contain only the first FieldDef, but \
+         field_index {extra_field_index} also has input_index 0"
+    )]
+    ExtraFieldInFirstGroup {
+        /// The `field_index` of the second field sharing `input_index = 0`.
+        extra_field_index: u8,
+    },
+
+    /// A non-first `input_index` group does not cover exactly 4 contiguous
+    /// bytes.  DPDK's runtime loads 4 bytes per group; gaps or overlaps in
+    /// the field coverage of a group would either let DPDK read past the
+    /// declared fields or build a trie node with inconsistent semantics.
+    #[error(
+        "input_index {input_index} group does not cover exactly 4 \
+         contiguous bytes starting at offset {group_offset} \
+         (coverage bitmask within the window: {coverage_mask:#06b}, \
+         expected 0b1111)"
+    )]
+    InputIndexGroupCoverage {
+        /// The offending `input_index`.
+        input_index: u8,
+        /// The lowest `offset` of any field in the group.
+        group_offset: u32,
+        /// 4-bit mask of which bytes in `[group_offset, group_offset+4)`
+        /// are covered by some field in the group.  `0b1111` is the
+        /// expected value.
+        coverage_mask: u8,
+    },
+
+    /// Two fields in the same `input_index` group overlap in the bytes
+    /// they cover.  DPDK requires each byte in a group to be claimed by
+    /// at most one field.
+    #[error(
+        "input_index {input_index} group: field_index {field_index} \
+         overlaps another field in the same group (overlap mask: \
+         {overlap_mask:#06b})"
+    )]
+    OverlappingFieldsInGroup {
+        /// The offending `input_index`.
+        input_index: u8,
+        /// The `field_index` of the field that introduced the overlap.
+        field_index: u8,
+        /// 4-bit mask of the overlapping bytes within the group window.
+        overlap_mask: u8,
+    },
+
+    /// A field's `offset + size` (or its `input_index` group's
+    /// `group_offset + 4`) overflows `u32`.  DPDK loads from those offsets
+    /// at classify time, and `min_input_size()` would have to report at
+    /// least that endpoint -- but a `u32` cannot represent it, which
+    /// would let a caller satisfy the documented buffer-size precondition
+    /// while DPDK still reads past the end.  We reject such layouts at
+    /// construction time.
+    #[error(
+        "field_index {field_index} extent overflows u32: \
+         offset={offset}, size={size_bytes}, would extend past u32::MAX"
+    )]
+    FieldExtentOverflow {
+        /// The offending `field_index`.
+        field_index: u8,
+        /// The field's offset.
+        offset: u32,
+        /// The field's size in bytes.
+        size_bytes: u8,
+    },
+
+    /// Fields sharing an `input_index` are not contiguous in the array.
+    ///
+    /// DPDK's `acl_build_index` records each group's data-index entry at
+    /// the **first occurrence** of the input_index in definition order.
+    /// If fields with the same `input_index` are interleaved with other
+    /// groups, the wrapper's `min_input_size` calculation (which assumes
+    /// the first occurrence is also the group's load offset) can diverge
+    /// from DPDK's actual load position, undermining the safety contract.
+    /// We require all fields sharing an `input_index` to be consecutive
+    /// in the `field_defs` array.
+    #[error(
+        "input_index {input_index} fields are not contiguous in the \
+         field_defs array: field at array position {position} has \
+         input_index {input_index} but a different input_index appeared \
+         between this field and an earlier sibling"
+    )]
+    NonContiguousInputIndexGroup {
+        /// The offending `input_index`.
+        input_index: u8,
+        /// The array position of the field that resumed the group.
+        position: usize,
+    },
+
+    /// Within a contiguous `input_index` group, the fields are not in
+    /// strictly-ascending offset order.
+    ///
+    /// DPDK's `acl_build_index` uses the offset of the **first** field
+    /// in each group (in definition order) as the group's load address.
+    /// Requiring offset-ascending order within each group makes that
+    /// first occurrence also the lowest offset, so the wrapper's
+    /// `min_input_size` (computed from `min(offset) per group`) matches
+    /// DPDK's actual load position.
+    #[error(
+        "input_index {input_index} group: field at array position \
+         {position} has offset {offset}, which is not strictly greater \
+         than the previous field's offset {previous_offset}"
+    )]
+    GroupFieldsNotOffsetOrdered {
+        /// The offending `input_index`.
+        input_index: u8,
+        /// The array position of the out-of-order field.
+        position: usize,
+        /// The offending field's offset.
+        offset: u32,
+        /// The previous (in-group) field's offset.
+        previous_offset: u32,
+    },
+}
+
+impl<const N: usize> AclBuildConfig<N> {
+    /// Compile-time guard: `N == 0` is rejected at monomorphization so
+    /// `AclBuildConfig::<0>::new` fails to compile.  Mirrors the symmetric
+    /// guards on [`Rule<N>`][super::rule::Rule] and [`AclCreateParams<N>`].
+    const _CHECK_N_NONZERO: () = assert!(N > 0, "AclBuildConfig requires N > 0");
+
+    /// Compile-time guard: `N` must not exceed
+    /// [`MAX_FIELDS`][super::config::MAX_FIELDS].
+    ///
+    /// Mirrors the same guard on [`AclCreateParams<N>`] so that an
+    /// out-of-range `N` is rejected uniformly across the two configuration
+    /// types -- without this, `AclBuildConfig<65>` would compile and only
+    /// fall over at runtime in `AclBuildConfig::new`'s `TooManyFields`
+    /// branch.  Forced to evaluate in `new` via a let-binding.
+    const _CHECK_N_FITS_MAX_FIELDS: () = assert!(
+        N <= MAX_FIELDS,
+        "AclBuildConfig requires N <= RTE_ACL_MAX_FIELDS (64)"
+    );
+
+    /// Create a validated build configuration.
+    ///
+    /// # Arguments
+    ///
+    /// * `num_categories` -- the number of result categories.  Must be in
+    ///   `1..=`[`MAX_CATEGORIES`] and either `1` or a multiple of [`RESULTS_MULTIPLIER`].
+    /// * `field_defs` -- the field definitions for the rule layout (one per field).
+    /// * `max_size` -- maximum memory (in bytes) for compiled structures, or `0` for no limit.
+    ///
+    /// # Validation scope
+    ///
+    /// This constructor checks:
+    ///
+    /// - **First field shape**: size = 1, `input_index` = 0 (DPDK's
+    ///   trie-entry-byte contract).  `offset` is unconstrained -- the
+    ///   first field may sit at any byte position in the input buffer,
+    ///   and [`min_input_size`][AclBuildConfig::min_input_size] accounts
+    ///   for leading bytes via the per-group load-endpoint formula.
+    ///   See [`InvalidFirstField`][InvalidAclBuildConfig::InvalidFirstField]
+    ///   for the precise contract.
+    /// - **`field_index` invariants**: every `field_index` is `< N`, all
+    ///   values are unique.
+    /// - **`input_index = 0` group**: contains only the first field (no
+    ///   other field may share `input_index = 0`).
+    /// - **Non-first `input_index` groups**: the union of fields sharing
+    ///   the group's `input_index` covers **exactly 4 contiguous bytes**
+    ///   with no overlaps -- matches DPDK's runtime 4-byte-per-group
+    ///   load pattern.  This is the load-bearing safety check for the
+    ///   [`min_input_size`][AclBuildConfig::min_input_size] contract.
+    /// - **Categories**: `num_categories` is in `1..=MAX_CATEGORIES` and
+    ///   either `1` or a multiple of `RESULTS_MULTIPLIER`.
+    ///
+    /// An `Ok` from this constructor does **not** imply a successful build
+    /// at DPDK time -- DPDK may still reject the config for reasons we do
+    /// not pre-check (e.g. excessive trie size with `max_size > 0`).  But
+    /// every reason the wrapper accepts a config corresponds to a layout
+    /// whose `classify`-time loads stay within
+    /// [`min_input_size`][AclBuildConfig::min_input_size] bytes.
+    ///
+    /// [`AclBuildError::InvalidConfig`]: super::error::AclBuildError::InvalidConfig
+    ///
+    /// # Errors
+    ///
+    /// Returns [`InvalidAclBuildConfig`] if any parameter is out of range.
+    #[cold]
+    #[tracing::instrument(level = "debug")]
+    pub fn new(
+        num_categories: u32,
+        field_defs: [FieldDef; N],
+        max_size: usize,
+    ) -> Result<Self, InvalidAclBuildConfig> {
+        // Force evaluation of both const assertions for this monomorphisation.
+        // `_CHECK_N_FITS_MAX_FIELDS` makes `N > MAX_FIELDS` a compile error,
+        // so the runtime branch below is unreachable for any properly
+        // monomorphised call; we keep the runtime check as a defence-in-depth
+        // (and to surface a typed `TooManyFields` error rather than a panic
+        // for cases where the const-assert is bypassed).
+        let () = Self::_CHECK_N_NONZERO;
+        let () = Self::_CHECK_N_FITS_MAX_FIELDS;
+
+        if N > MAX_FIELDS {
+            return Err(InvalidAclBuildConfig::TooManyFields {
+                num_fields: N,
+                max: MAX_FIELDS,
+            });
+        }
+        if num_categories == 0 {
+            return Err(InvalidAclBuildConfig::ZeroCategories);
+        }
+        if num_categories > MAX_CATEGORIES {
+            return Err(InvalidAclBuildConfig::TooManyCategories {
+                num_categories,
+                max: MAX_CATEGORIES,
+            });
+        }
+        if num_categories > 1 && !num_categories.is_multiple_of(RESULTS_MULTIPLIER) {
+            return Err(InvalidAclBuildConfig::CategoriesNotAligned {
+                num_categories,
+                multiplier: RESULTS_MULTIPLIER,
+            });
+        }
+
+        // First field: DPDK requires size = 1 (the trie's entry byte),
+        // and the wrapper additionally requires input_index = 0 so that
+        // the entry byte sits in its own input-index group (see the
+        // grouping validator below).  `offset` is unconstrained -- it
+        // simply describes where in the input buffer the entry byte
+        // lives; `min_input_size` accounts for any leading bytes.
+        // N > 0 has been checked above, so field_defs[0] is safe to index.
+        let first = &field_defs[0];
+        if !matches!(first.size(), super::field::FieldSize::One) || first.input_index() != 0 {
+            return Err(InvalidAclBuildConfig::InvalidFirstField {
+                size: first.size(),
+                input_index: first.input_index(),
+            });
+        }
+
+        // Every field_index must be < N (DPDK uses it to index the rule's
+        // field array, so out-of-range reads past Rule<N>) and unique.
+        // O(N^2) duplicate check is fine: N <= RTE_ACL_MAX_FIELDS = 64.
+        for (i, def) in field_defs.iter().enumerate() {
+            let fi = def.field_index();
+            if (fi as usize) >= N {
+                return Err(InvalidAclBuildConfig::FieldIndexOutOfRange {
+                    field_index: fi,
+                    n: N,
+                });
+            }
+            for later in &field_defs[i + 1..] {
+                if later.field_index() == fi {
+                    return Err(InvalidAclBuildConfig::DuplicateFieldIndex { field_index: fi });
+                }
+            }
+        }
+
+        // No other field may share input_index = 0; that group is reserved
+        // for the 1-byte first field.
+        for def in &field_defs[1..] {
+            if def.input_index() == 0 {
+                return Err(InvalidAclBuildConfig::ExtraFieldInFirstGroup {
+                    extra_field_index: def.field_index(),
+                });
+            }
+        }
+
+        // Validate definition-order shape:
+        //
+        // 1. Fields with the same `input_index` must appear consecutively
+        //    in `field_defs` (no interleaving with other groups).  DPDK's
+        //    `acl_build_index` walks defs in array order and records a
+        //    new data-index slot whenever input_index changes; an
+        //    interleaving caller would create two separate data-index
+        //    slots for the same logical group, breaking the
+        //    `min_input_size` calculation.
+        //
+        // 2. Within each contiguous run, offsets must be strictly
+        //    ascending.  DPDK uses the first field's offset (in array
+        //    order) as the group's load address; requiring
+        //    offset-ascending order makes that first field also the
+        //    lowest-offset field, so our `min_input_size` (computed from
+        //    `min(offset)` per group) matches DPDK's actual load.
+        //
+        // We track each input_index's "already closed" status via a
+        // bitmap: once a different input_index is observed after we've
+        // started one, the closed bit for that one is set and a later
+        // re-occurrence is an error.  Indexed by `input_index`, which
+        // fits in u8 (i.e. 0..=255).
+        let mut closed = [false; 256];
+        let mut current_input_index: Option<(u8, u32)> = None; // (input_index, last_offset_seen)
+        for (pos, def) in field_defs.iter().enumerate() {
+            let ii = def.input_index();
+            let offset = def.offset();
+            match current_input_index {
+                Some((open_ii, last_offset)) if open_ii == ii => {
+                    // Still inside the same group; verify offset > last_offset.
+                    if offset <= last_offset {
+                        return Err(InvalidAclBuildConfig::GroupFieldsNotOffsetOrdered {
+                            input_index: ii,
+                            position: pos,
+                            offset,
+                            previous_offset: last_offset,
+                        });
+                    }
+                    current_input_index = Some((ii, offset));
+                }
+                Some((open_ii, _)) => {
+                    // Group `open_ii` is now closed; start `ii` if it
+                    // hasn't already been closed.
+                    closed[open_ii as usize] = true;
+                    if closed[ii as usize] {
+                        return Err(InvalidAclBuildConfig::NonContiguousInputIndexGroup {
+                            input_index: ii,
+                            position: pos,
+                        });
+                    }
+                    current_input_index = Some((ii, offset));
+                }
+                None => {
+                    current_input_index = Some((ii, offset));
+                }
+            }
+        }
+
+        // Reject any field whose extent (`offset + size`) or whose
+        // group-load endpoint (`offset + 4`) would overflow `u32`.
+        // `min_input_size` reports a `usize` derived from these
+        // endpoints; if the u32 arithmetic saturates, the reported
+        // bound understates DPDK's actual read extent and the safety
+        // contract is broken.
+        for def in &field_defs {
+            let size_bytes = def.size() as u8 as u32;
+            if def.offset().checked_add(size_bytes).is_none()
+                || def.offset().checked_add(4).is_none()
+            {
+                return Err(InvalidAclBuildConfig::FieldExtentOverflow {
+                    field_index: def.field_index(),
+                    offset: def.offset(),
+                    size_bytes: def.size() as u8,
+                });
+            }
+        }
+
+        // Validate the input_index grouping rule for non-first groups:
+        // every field sharing an input_index > 0 must fit inside a 4-byte
+        // window starting at the group's lowest offset, and the union of
+        // all fields in the group must cover **exactly** those 4 bytes
+        // with no overlap.  DPDK loads 4 contiguous bytes per group at the
+        // group_offset; a sub-4-byte covered region would leave loaded
+        // bytes unattributed to any field (incorrect trie traversal), and
+        // an overlap would build a trie node with inconsistent semantics.
+        //
+        // O(N^2) again; N <= MAX_FIELDS = 64.  Coverage tracked as a 4-bit
+        // mask within the group window (bit i means "byte at group_offset + i").
+        // The overflow check above means `offset + size` and `group_offset
+        // + 4` no longer need saturation; they fit in u32 by construction.
+        for def in &field_defs {
+            let ii = def.input_index();
+            if ii == 0 {
+                continue; // already handled above
+            }
+            // group_offset = min(field.offset for field where input_index == ii)
+            let mut group_offset = def.offset();
+            for other in &field_defs {
+                if other.input_index() == ii && other.offset() < group_offset {
+                    group_offset = other.offset();
+                }
+            }
+            let extent_end = def.offset() + def.size() as u8 as u32;
+            let window_end = group_offset + 4;
+            if extent_end > window_end {
+                return Err(InvalidAclBuildConfig::InvalidInputIndexGrouping {
+                    input_index: ii,
+                    group_offset,
+                    field_index: def.field_index(),
+                    extent_end,
+                    window_end,
+                });
+            }
+        }
+        // Second pass: each non-first input_index group must cover exactly
+        // 4 contiguous bytes via the union of its fields, with no overlap.
+        // We iterate inputs once, dedup'ing by tracking the first
+        // appearance of each input_index.
+        for (anchor_idx, anchor) in field_defs.iter().enumerate() {
+            let ii = anchor.input_index();
+            if ii == 0 {
+                continue;
+            }
+            // Process this input_index only at its first occurrence.
+            if field_defs[..anchor_idx]
+                .iter()
+                .any(|prev| prev.input_index() == ii)
+            {
+                continue;
+            }
+            // group_offset = min(field.offset for field in group)
+            let group_offset = field_defs
+                .iter()
+                .filter(|d| d.input_index() == ii)
+                .map(|d| d.offset())
+                .min()
+                .unwrap_or(anchor.offset());
+            // Accumulate the 4-bit coverage mask; reject overlaps.
+            let mut mask: u8 = 0;
+            for d in field_defs.iter().filter(|d| d.input_index() == ii) {
+                let shift = (d.offset() - group_offset) as u8;
+                let size_bits = d.size() as u8;
+                let field_mask = ((1u8 << size_bits) - 1) << shift;
+                let overlap = mask & field_mask;
+                if overlap != 0 {
+                    return Err(InvalidAclBuildConfig::OverlappingFieldsInGroup {
+                        input_index: ii,
+                        field_index: d.field_index(),
+                        overlap_mask: overlap,
+                    });
+                }
+                mask |= field_mask;
+            }
+            if mask != 0b1111 {
+                return Err(InvalidAclBuildConfig::InputIndexGroupCoverage {
+                    input_index: ii,
+                    group_offset,
+                    coverage_mask: mask,
+                });
+            }
+        }
+
+        // Memoize the safety-critical buffer-size requirement.  All
+        // grouping invariants have been validated above, so this loop is
+        // sound and the result is constant for the lifetime of the
+        // config.
+        let min_input_size = Self::compute_min_input_size(&field_defs);
+
+        debug!(
+            "Created ACL build config: num_categories={num_categories}, num_fields={N}, max_size={max_size}, min_input_size={min_input_size}",
+        );
+
+        Ok(Self {
+            num_categories,
+            field_defs,
+            max_size,
+            min_input_size,
+        })
+    }
+
+    /// Compute the buffer-size requirement at construction time.
+    ///
+    /// See [`min_input_size`][AclBuildConfig::min_input_size] for the
+    /// formula and rationale.  Factored out so that `new` can call it
+    /// once and cache the result; the public accessor returns the cached
+    /// value.
+    ///
+    /// Precondition: all fields' `offset + 4` fit in `u32`.  This is
+    /// guaranteed by the `FieldExtentOverflow` check in
+    /// [`new`][AclBuildConfig::new], so the plain `+` below cannot
+    /// overflow.
+    fn compute_min_input_size(field_defs: &[FieldDef; N]) -> usize {
+        let mut max_load_end: u32 = 0;
+        for def in field_defs {
+            let ii = def.input_index();
+            let mut group_offset = def.offset();
+            for other in field_defs {
+                if other.input_index() == ii && other.offset() < group_offset {
+                    group_offset = other.offset();
+                }
+            }
+            // No saturation: `new`'s FieldExtentOverflow check has
+            // already verified `def.offset() + 4 <= u32::MAX` for every
+            // def, and `group_offset <= def.offset()`.
+            let load_end = group_offset + 4;
+            if load_end > max_load_end {
+                max_load_end = load_end;
+            }
+        }
+        max_load_end as usize
+    }
+
+    /// Get the number of categories.
+    #[must_use]
+    pub fn num_categories(&self) -> u32 {
+        self.num_categories
+    }
+
+    /// Get the field definitions.
+    #[must_use]
+    pub fn field_defs(&self) -> &[FieldDef; N] {
+        &self.field_defs
+    }
+
+    /// Get the maximum memory size for compiled structures.
+    #[must_use]
+    pub fn max_size(&self) -> usize {
+        self.max_size
+    }
+
+    /// The minimum size, in bytes, that an input buffer passed to
+    /// [`classify`][super::context::AclContext::classify] must be valid for.
+    ///
+    /// DPDK's classify loop does **not** read one field at a time at the
+    /// field's `offset`; it performs 4-byte aligned loads where each load's
+    /// starting offset is the lowest `FieldDef.offset` within an
+    /// `input_index` group.  For every distinct `input_index` value the
+    /// buffer must therefore be valid for reads in
+    /// `[group_offset, group_offset + 4)`.  This function returns the
+    /// maximum `group_offset + 4` across all `input_index` groups.
+    ///
+    /// The grouping invariant validated by [`new`][AclBuildConfig::new]
+    /// (every field's `offset + size` fits within its group's 4-byte
+    /// window) guarantees that this value is also at least
+    /// `max(field.offset + field.size)`.
+    ///
+    /// Callers of the unsafe [`classify`][super::context::AclContext::classify]
+    /// API should size their input buffers to at least this value to avoid
+    /// out-of-bounds reads.
+    ///
+    /// Computed and cached at [`new`][AclBuildConfig::new] time;
+    /// returning the cached value is O(1).
+    #[must_use]
+    pub fn min_input_size(&self) -> usize {
+        self.min_input_size
+    }
+
+    /// Convert to the raw DPDK [`rte_acl_config`][dpdk_sys::rte_acl_config].
+    ///
+    /// The returned struct is fully owned and has no lifetime dependency on `self`.
+    ///
+    /// # Stack footprint
+    ///
+    /// `rte_acl_config::defs` is a fixed-size C array of
+    /// `RTE_ACL_MAX_FIELDS` (= [`MAX_FIELDS`] = 64) entries -- about 0.5 KiB
+    /// on the stack at 8 bytes per `rte_acl_field_def`.  Build is a cold
+    /// path, so the size is acceptable; we materialise the full array
+    /// because DPDK's `rte_acl_build` reads `defs[0..num_fields]` and
+    /// ignores entries beyond `num_fields`, but the array storage itself
+    /// must be present.
+    pub(crate) fn to_raw(&self) -> dpdk_sys::rte_acl_config {
+        let mut defs = [dpdk_sys::rte_acl_field_def::default(); MAX_FIELDS];
+        for (i, def) in self.field_defs.iter().enumerate() {
+            defs[i] = dpdk_sys::rte_acl_field_def::from(def);
+        }
+        dpdk_sys::rte_acl_config {
+            num_categories: self.num_categories,
+            num_fields: N as u32,
+            defs,
+            max_size: self.max_size,
+        }
+    }
+}
+
+impl<const N: usize> Display for AclBuildConfig<N> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "AclBuildConfig<{N}> {{ num_categories: {}, max_size: {} }}",
+            self.num_categories, self.max_size,
+        )
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::acl::field::{FieldSize, FieldType};
+
+    /// Test-local shorthand: build a `NonZero<u32>` from a literal that we know is non-zero.
+    fn nz(value: u32) -> NonZero<u32> {
+        NonZero::new(value).expect("test literal is non-zero")
+    }
+
+    // -- AclCreateParams name validation --
+
+    #[test]
+    fn valid_name_accepted() {
+        let result = AclCreateParams::<5>::new("my_acl_ctx", SocketId::ANY, nz(1024));
+        assert!(result.is_ok());
+        let params = result.unwrap();
+        assert_eq!(params.name(), "my_acl_ctx");
+    }
+
+    #[test]
+    fn empty_name_rejected() {
+        let result = AclCreateParams::<1>::new("", SocketId::ANY, nz(128));
+        assert!(matches!(result, Err(InvalidAclName::Empty)));
+    }
+
+    #[test]
+    fn non_ascii_name_rejected() {
+        // Three-character non-ASCII string (U+65E5 U+672C U+8A9E).  Spelled
+        // out via escapes rather than a literal so source stays ASCII-only.
+        let result = AclCreateParams::<1>::new("\u{65E5}\u{672C}\u{8A9E}", SocketId::ANY, nz(128));
+        assert!(matches!(result, Err(InvalidAclName::NotAscii)));
+    }
+
+    #[test]
+    fn too_long_name_rejected() {
+        // MAX_ACL_NAME_LEN is RTE_ACL_NAMESIZE - 1 = 31
+        let long_name: String = "a".repeat(MAX_ACL_NAME_LEN + 1);
+        let result = AclCreateParams::<1>::new(&long_name, SocketId::ANY, nz(128));
+        assert!(matches!(result, Err(InvalidAclName::TooLong { .. })));
+    }
+
+    #[test]
+    fn max_length_name_accepted() {
+        let name: String = "a".repeat(MAX_ACL_NAME_LEN);
+        let result = AclCreateParams::<1>::new(&name, SocketId::ANY, nz(128));
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn name_with_null_byte_rejected() {
+        let result = AclCreateParams::<1>::new("hello\0world", SocketId::ANY, nz(128));
+        assert!(matches!(result, Err(InvalidAclName::ContainsNullBytes)));
+    }
+
+    #[test]
+    fn rule_size_matches_generic() {
+        let params = AclCreateParams::<5>::new("test", SocketId::ANY, nz(128)).unwrap();
+        assert_eq!(
+            params.rule_size().get() as usize,
+            core::mem::size_of::<Rule<5>>()
+        );
+    }
+
+    #[test]
+    fn to_raw_preserves_values() {
+        let params = AclCreateParams::<3>::new("raw_test", SocketId::ANY, nz(256)).unwrap();
+        let raw_params = params.to_raw();
+        // SAFETY: raw_params borrows from `params`, which is alive in this scope.
+        let raw = unsafe { *raw_params.as_ptr() };
+        // Name pointer should point to the same C string data.
+        let raw_name = unsafe { CStr::from_ptr(raw.name) };
+        assert_eq!(raw_name.to_str().unwrap(), "raw_test");
+        assert_eq!(raw.max_rule_num, 256);
+        assert_eq!(raw.rule_size as usize, core::mem::size_of::<Rule<3>>());
+    }
+
+    #[test]
+    fn display_contains_name() {
+        let params = AclCreateParams::<1>::new("display_test", SocketId::ANY, nz(10)).unwrap();
+        let s = format!("{params}");
+        assert!(s.contains("display_test"), "got: {s}");
+    }
+
+    // -- AclBuildConfig validation --
+
+    /// Build a valid `[FieldDef; N]` with the DPDK first-field-is-one-byte
+    /// constraint satisfied.
+    fn sample_field_defs<const N: usize>() -> [FieldDef; N] {
+        core::array::from_fn(|i| {
+            if i == 0 {
+                FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0)
+            } else {
+                FieldDef::new(
+                    FieldType::Mask,
+                    FieldSize::Four,
+                    i as u8,
+                    i as u8,
+                    (i * 4) as u32,
+                )
+            }
+        })
+    }
+
+    #[test]
+    fn valid_build_config_single_category() {
+        let cfg = AclBuildConfig::new(1, sample_field_defs::<5>(), 0);
+        assert!(cfg.is_ok());
+        let cfg = cfg.unwrap();
+        assert_eq!(cfg.num_categories(), 1);
+        assert_eq!(cfg.max_size(), 0);
+        assert_eq!(cfg.field_defs().len(), 5);
+    }
+
+    #[test]
+    fn valid_build_config_multiple_categories() {
+        let cfg = AclBuildConfig::new(4, sample_field_defs::<3>(), 1024);
+        assert!(cfg.is_ok());
+        assert_eq!(cfg.unwrap().num_categories(), 4);
+    }
+
+    #[test]
+    fn zero_categories_rejected() {
+        let result = AclBuildConfig::new(0, sample_field_defs::<1>(), 0);
+        assert!(matches!(result, Err(InvalidAclBuildConfig::ZeroCategories)));
+    }
+
+    #[test]
+    fn too_many_categories_rejected() {
+        let result = AclBuildConfig::new(MAX_CATEGORIES + 1, sample_field_defs::<1>(), 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::TooManyCategories { .. })
+        ));
+    }
+
+    #[test]
+    fn max_categories_accepted() {
+        let result = AclBuildConfig::new(MAX_CATEGORIES, sample_field_defs::<1>(), 0);
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn misaligned_categories_rejected() {
+        // 3 is > 1 but not a multiple of RESULTS_MULTIPLIER (4)
+        let result = AclBuildConfig::new(3, sample_field_defs::<1>(), 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::CategoriesNotAligned { .. })
+        ));
+    }
+
+    #[test]
+    fn to_raw_build_config_preserves_fields() {
+        // Two 2-byte Range fields in input_index 1 (offsets 4 and 6) fill
+        // bytes [4, 8) exactly -- a valid grouping under the strict rule.
+        let defs: [FieldDef; 3] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Range, FieldSize::Two, 1, 1, 4),
+            FieldDef::new(FieldType::Range, FieldSize::Two, 2, 1, 6),
+        ];
+        let cfg = AclBuildConfig::new(1, defs, 4096).unwrap();
+        let raw = cfg.to_raw();
+        assert_eq!(raw.num_categories, 1);
+        assert_eq!(raw.num_fields, 3);
+        assert_eq!(raw.max_size, 4096);
+        assert_eq!(raw.defs[0].type_, FieldType::Bitmask as u8);
+        assert_eq!(raw.defs[0].size, FieldSize::One as u8);
+        assert_eq!(raw.defs[0].offset, 0);
+        assert_eq!(raw.defs[1].type_, FieldType::Range as u8);
+        assert_eq!(raw.defs[1].size, FieldSize::Two as u8);
+        assert_eq!(raw.defs[1].offset, 4);
+        assert_eq!(raw.defs[2].type_, FieldType::Range as u8);
+        assert_eq!(raw.defs[2].size, FieldSize::Two as u8);
+        assert_eq!(raw.defs[2].offset, 6);
+    }
+
+    #[test]
+    fn build_config_display() {
+        let cfg = AclBuildConfig::new(4, sample_field_defs::<3>(), 0).unwrap();
+        let s = format!("{cfg}");
+        assert!(s.contains("AclBuildConfig<3>"), "got: {s}");
+        assert!(s.contains("num_categories: 4"), "got: {s}");
+    }
+
+    // Note: there is no runtime `zero_fields_rejected` test.  N == 0 is
+    // rejected at compile time by the `_CHECK_N_NONZERO` const assertion on
+    // `AclBuildConfig<N>`, so `AclBuildConfig::<0>::new(1, [], 0)` would
+    // fail to monomorphize.
+
+    #[test]
+    fn first_field_invalid_rejected() {
+        // First field is Four bytes -- must be One.
+        let defs: [FieldDef; 2] = [
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 4),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::InvalidFirstField {
+                size: FieldSize::Four,
+                input_index: 0,
+            })
+        ));
+    }
+
+    #[test]
+    fn field_index_out_of_range_rejected() {
+        // N = 2 but field_index = 5 on the second def -- DPDK would index
+        // past Rule<2> when looking up the field value.
+        let defs: [FieldDef; 2] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 5, 1, 4),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::FieldIndexOutOfRange {
+                field_index: 5,
+                n: 2
+            })
+        ));
+    }
+
+    #[test]
+    fn duplicate_field_index_rejected() {
+        // Both defs declare field_index = 0.
+        let defs: [FieldDef; 2] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 0, 1, 4),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::DuplicateFieldIndex { field_index: 0 })
+        ));
+    }
+
+    #[test]
+    fn invalid_input_index_grouping_rejected() {
+        // Two fields share input_index 1, but their offsets span more than 4
+        // bytes (offset 4 + offset 12 cannot both fit in [4, 8) -- field at
+        // offset 12 with size 4 extends to offset 16, but the group window
+        // is [4, 8)).
+        let defs: [FieldDef; 3] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 4),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 2, 1, 12),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::InvalidInputIndexGrouping {
+                input_index: 1,
+                group_offset: 4,
+                field_index: 2,
+                extent_end: 16,
+                window_end: 8,
+            })
+        ));
+    }
+
+    #[test]
+    fn extra_field_in_first_group_rejected() {
+        // Two fields share input_index 0; only field_defs[0] is allowed there.
+        let defs: [FieldDef; 2] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 0, 4),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::ExtraFieldInFirstGroup {
+                extra_field_index: 1
+            })
+        ));
+    }
+
+    #[test]
+    fn undersized_group_rejected() {
+        // input_index 1 has a single 1-byte field; group must cover all 4
+        // bytes.  Coverage mask would be 0b0001 (just byte 0 of the group).
+        let defs: [FieldDef; 2] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 1, 1, 4),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::InputIndexGroupCoverage {
+                input_index: 1,
+                group_offset: 4,
+                coverage_mask: 0b0001,
+            })
+        ));
+    }
+
+    #[test]
+    fn overlapping_group_fields_rejected() {
+        // A 4-byte field at offset 4 followed by a 2-byte field at offset
+        // 6 -- both in input_index 1.  Offsets are strictly ascending
+        // (passes ordering), but the byte ranges overlap in [6, 8).
+        let defs: [FieldDef; 3] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 4),
+            FieldDef::new(FieldType::Mask, FieldSize::Two, 2, 1, 6),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::OverlappingFieldsInGroup { input_index: 1, .. })
+        ));
+    }
+
+    #[test]
+    fn non_contiguous_input_index_group_rejected() {
+        // input_index 1 is interrupted by input_index 2 and then resumed.
+        let defs: [FieldDef; 4] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Two, 1, 1, 4),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 2, 2, 8),
+            FieldDef::new(FieldType::Mask, FieldSize::Two, 3, 1, 6),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::NonContiguousInputIndexGroup {
+                input_index: 1,
+                position: 3,
+            })
+        ));
+    }
+
+    #[test]
+    fn group_fields_not_offset_ordered_rejected() {
+        // Within input_index 1, the second field has a lower offset than
+        // the first.  Ordering must be strictly ascending.
+        let defs: [FieldDef; 3] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Two, 1, 1, 6),
+            FieldDef::new(FieldType::Mask, FieldSize::Two, 2, 1, 4),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::GroupFieldsNotOffsetOrdered {
+                input_index: 1,
+                position: 2,
+                offset: 4,
+                previous_offset: 6,
+            })
+        ));
+    }
+
+    #[test]
+    fn field_extent_overflow_rejected() {
+        // A 4-byte field at offset = u32::MAX - 2: offset + size = u32::MAX + 2
+        // overflows u32.  Must be rejected at construction; otherwise
+        // min_input_size's u32-based computation would saturate and
+        // understate DPDK's actual read endpoint.
+        let defs: [FieldDef; 2] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, u32::MAX - 2),
+        ];
+        let result = AclBuildConfig::new(1, defs, 0);
+        assert!(matches!(
+            result,
+            Err(InvalidAclBuildConfig::FieldExtentOverflow {
+                field_index: 1,
+                offset: o,
+                size_bytes: 4,
+            }) if o == u32::MAX - 2
+        ));
+    }
+
+    #[test]
+    fn min_input_size_uses_group_offsets() {
+        // input_index 9 group fully covers bytes [100, 104) via a 4-byte
+        // field.  DPDK loads 4 bytes from the group_offset (100), so
+        // min_input_size must be 104.  A formula like `input_index * 4 +
+        // 4` (which earlier wrapper versions used) would compute 40 and
+        // let DPDK read past the end of an undersized buffer.
+        let defs: [FieldDef; 2] = [
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 9, 100),
+        ];
+        let cfg = AclBuildConfig::new(1, defs, 0).expect("config should validate");
+        assert_eq!(
+            cfg.min_input_size(),
+            104,
+            "DPDK loads 4 bytes from group_offset = 100, so min_input_size = 104"
+        );
+    }
+
+    /// Property: `AclCreateParams::new` accepts a name iff it is non-empty
+    /// ASCII without interior NUL bytes and of length `<= MAX_ACL_NAME_LEN`.
+    /// Verifies the four error variants are mutually exclusive and that the
+    /// expected variant is produced for each rejection class.
+    #[test]
+    fn create_params_name_validation_property() {
+        bolero::check!()
+            .with_type::<String>()
+            .for_each(|name: &String| {
+                let result = AclCreateParams::<1>::new(name.as_str(), SocketId::ANY, nz(1));
+                match result {
+                    Ok(params) => {
+                        // Name was accepted: must satisfy all preconditions.
+                        assert!(!name.is_empty());
+                        assert!(name.is_ascii());
+                        assert!(name.len() <= MAX_ACL_NAME_LEN);
+                        assert!(!name.contains('\0'));
+                        assert_eq!(params.name(), name.as_str());
+                    }
+                    Err(InvalidAclName::Empty) => assert!(name.is_empty()),
+                    Err(InvalidAclName::NotAscii) => assert!(!name.is_ascii()),
+                    Err(InvalidAclName::TooLong { len, max }) => {
+                        assert_eq!(len, name.len());
+                        assert_eq!(max, MAX_ACL_NAME_LEN);
+                        assert!(name.len() > MAX_ACL_NAME_LEN);
+                    }
+                    Err(InvalidAclName::ContainsNullBytes) => {
+                        // Reached only after Empty / NotAscii / TooLong checks
+                        // pass, so the name is non-empty ASCII of valid length
+                        // and must contain at least one interior NUL.
+                        assert!(!name.is_empty());
+                        assert!(name.is_ascii());
+                        assert!(name.len() <= MAX_ACL_NAME_LEN);
+                        assert!(name.contains('\0'));
+                    }
+                }
+            });
+    }
+
+    /// Property: `AclBuildConfig::new` accepts `num_categories` iff it is
+    /// non-zero, within `MAX_CATEGORIES`, and either `1` or a multiple of
+    /// `RESULTS_MULTIPLIER`.
+    #[test]
+    fn build_config_num_categories_validation_property() {
+        bolero::check!()
+            .with_type::<u32>()
+            .for_each(|num_categories: &u32| {
+                let result = AclBuildConfig::new(*num_categories, sample_field_defs::<1>(), 0);
+                let in_range = *num_categories > 0 && *num_categories <= MAX_CATEGORIES;
+                let aligned =
+                    *num_categories == 1 || (*num_categories).is_multiple_of(RESULTS_MULTIPLIER);
+                match result {
+                    Ok(cfg) => {
+                        assert!(in_range);
+                        assert!(aligned);
+                        assert_eq!(cfg.num_categories(), *num_categories);
+                    }
+                    Err(InvalidAclBuildConfig::ZeroCategories) => {
+                        assert_eq!(*num_categories, 0);
+                    }
+                    Err(InvalidAclBuildConfig::TooManyCategories {
+                        num_categories: n,
+                        max,
+                    }) => {
+                        assert_eq!(n, *num_categories);
+                        assert_eq!(max, MAX_CATEGORIES);
+                        assert!(*num_categories > MAX_CATEGORIES);
+                    }
+                    Err(InvalidAclBuildConfig::CategoriesNotAligned { .. }) => {
+                        assert!(in_range);
+                        assert!(!aligned);
+                    }
+                    Err(InvalidAclBuildConfig::TooManyFields { .. }) => {
+                        unreachable!("N=1 cannot trigger TooManyFields")
+                    }
+                    Err(InvalidAclBuildConfig::FieldIndexOutOfRange { .. })
+                    | Err(InvalidAclBuildConfig::DuplicateFieldIndex { .. })
+                    | Err(InvalidAclBuildConfig::InvalidFirstField { .. })
+                    | Err(InvalidAclBuildConfig::ExtraFieldInFirstGroup { .. })
+                    | Err(InvalidAclBuildConfig::InvalidInputIndexGrouping { .. })
+                    | Err(InvalidAclBuildConfig::InputIndexGroupCoverage { .. })
+                    | Err(InvalidAclBuildConfig::OverlappingFieldsInGroup { .. })
+                    | Err(InvalidAclBuildConfig::NonContiguousInputIndexGroup { .. })
+                    | Err(InvalidAclBuildConfig::GroupFieldsNotOffsetOrdered { .. })
+                    | Err(InvalidAclBuildConfig::FieldExtentOverflow { .. }) => {
+                        unreachable!(
+                            "sample_field_defs<1> produces a valid layout; field-array errors \
+                             are not reachable via this test"
+                        )
+                    }
+                }
+            });
+    }
+
+    /// Property: `AclBuildConfig::new` accepts a `[FieldDef; N]` iff an
+    /// independent Rust-side oracle says all wrapper-enforced invariants
+    /// hold.  Bolero generates a fuzzed 32-byte input, deterministically
+    /// constructs a `[FieldDef; 4]` from it, and checks that both the
+    /// validator and the oracle agree.
+    ///
+    /// The oracle is written from scratch (not copied from the impl) so
+    /// that a bug in either implementation will produce a disagreement.
+    /// Specifically catches mistakes in the ordering / contiguity /
+    /// coverage / overlap logic of [`AclBuildConfig::new`].
+    #[test]
+    fn build_config_field_defs_validation_property() {
+        const N: usize = 4;
+        // 8 bytes per FieldDef * 4 fields = 32 bytes of input.
+        bolero::check!()
+            .with_type::<[u8; 32]>()
+            .for_each(|input: &[u8; 32]| {
+                let defs = field_defs_from_bytes::<N>(input);
+                let actual = AclBuildConfig::new(1, defs, 0);
+                let expected_accept = oracle_field_defs_valid::<N>(&defs);
+                match (expected_accept, actual.as_ref()) {
+                    (true, Ok(_)) | (false, Err(_)) => {}
+                    (true, Err(e)) => {
+                        panic!(
+                            "oracle accepted layout but validator rejected: {e:?}\n  defs: {defs:?}"
+                        );
+                    }
+                    (false, Ok(_)) => {
+                        panic!("oracle rejected layout but validator accepted\n  defs: {defs:?}");
+                    }
+                }
+            });
+    }
+
+    /// Construct a `[FieldDef; N]` deterministically from raw bytes.
+    /// Each FieldDef consumes 8 bytes: 1 for field_type, 1 for size, 1
+    /// for field_index, 1 for input_index, 4 for offset.
+    ///
+    /// `field_type` is the low 2 bits of byte 0, mapping to Mask (0),
+    /// Range (1), Bitmask (2).  Value 3 is biased toward Mask (the
+    /// most common case) by also mapping it to Mask.
+    ///
+    /// `size` is the low 2 bits of byte 1, mapping to One (0/3), Two
+    /// (1), Four (2).
+    fn field_defs_from_bytes<const N: usize>(bytes: &[u8]) -> [FieldDef; N] {
+        use crate::acl::field::{FieldSize, FieldType};
+        core::array::from_fn(|i| {
+            let base = i * 8;
+            let ft = match bytes[base] & 0b11 {
+                0 | 3 => FieldType::Mask,
+                1 => FieldType::Range,
+                2 => FieldType::Bitmask,
+                _ => unreachable!(),
+            };
+            let sz = match bytes[base + 1] & 0b11 {
+                0 | 3 => FieldSize::One,
+                1 => FieldSize::Two,
+                2 => FieldSize::Four,
+                _ => unreachable!(),
+            };
+            let field_index = bytes[base + 2];
+            let input_index = bytes[base + 3];
+            let offset = u32::from_le_bytes([
+                bytes[base + 4],
+                bytes[base + 5],
+                bytes[base + 6],
+                bytes[base + 7],
+            ]);
+            FieldDef::new(ft, sz, field_index, input_index, offset)
+        })
+    }
+
+    /// Independent oracle: returns `true` iff every wrapper-enforced
+    /// invariant on `field_defs` holds.  Written from scratch (not
+    /// copied from `AclBuildConfig::new`) so that disagreement with the
+    /// impl pinpoints a bug in one or the other.
+    fn oracle_field_defs_valid<const N: usize>(field_defs: &[FieldDef; N]) -> bool {
+        use crate::acl::field::FieldSize;
+
+        if N == 0 || N > MAX_FIELDS {
+            return false;
+        }
+
+        // First field: size = One, input_index = 0 (offset is unconstrained).
+        let first = &field_defs[0];
+        if !matches!(first.size(), FieldSize::One) {
+            return false;
+        }
+        if first.input_index() != 0 {
+            return false;
+        }
+
+        // field_index < N and unique.
+        let mut seen = [false; 256];
+        for def in field_defs {
+            let fi = def.field_index() as usize;
+            if fi >= N {
+                return false;
+            }
+            if seen[fi] {
+                return false;
+            }
+            seen[fi] = true;
+        }
+
+        // Per-field extent fits in u32 (no `offset + size` or
+        // `offset + 4` overflow).
+        for def in field_defs {
+            let size_bytes = def.size() as u8 as u32;
+            if def.offset().checked_add(size_bytes).is_none()
+                || def.offset().checked_add(4).is_none()
+            {
+                return false;
+            }
+        }
+
+        // No other field shares input_index = 0.
+        for def in &field_defs[1..] {
+            if def.input_index() == 0 {
+                return false;
+            }
+        }
+
+        // Contiguity + intra-group ordering: walk the array, track the
+        // current "open" input_index and the previously-seen offset for
+        // it.  When input_index changes, mark the old one closed; if a
+        // later position uses an already-closed input_index, that's a
+        // non-contiguous group.
+        let mut closed = [false; 256];
+        let mut open: Option<(u8, u32)> = None;
+        for def in field_defs {
+            let ii = def.input_index();
+            let off = def.offset();
+            match open {
+                Some((cur_ii, last_off)) if cur_ii == ii => {
+                    if off <= last_off {
+                        return false;
+                    }
+                    open = Some((ii, off));
+                }
+                Some((cur_ii, _)) => {
+                    closed[cur_ii as usize] = true;
+                    if closed[ii as usize] {
+                        return false;
+                    }
+                    open = Some((ii, off));
+                }
+                None => {
+                    open = Some((ii, off));
+                }
+            }
+        }
+
+        // Each non-first input_index group: per-field extent fits in a
+        // 4-byte window from group_offset, total coverage is exactly
+        // 4 bytes with no overlap.
+        //
+        // `saturating_add` here vs. plain `+` in the impl: the impl
+        // gates this check behind the `FieldExtentOverflow` pre-flight
+        // (offsets where `offset + 4` overflows are already rejected),
+        // so plain `+` in the impl is sound.  The oracle runs the
+        // overflow check earlier and returns `false` on overflow too,
+        // so this branch is only reached for non-overflowing
+        // arithmetic -- but we keep `saturating_add` here as a
+        // defensive fence so a bug in the oracle's overflow check
+        // would not panic this loop while fuzzing.
+        for def in field_defs {
+            let ii = def.input_index();
+            if ii == 0 {
+                continue;
+            }
+            // group_offset = min offset across the group.
+            let group_offset = field_defs
+                .iter()
+                .filter(|d| d.input_index() == ii)
+                .map(|d| d.offset())
+                .min()
+                .expect("group is non-empty by construction");
+            let extent_end = def.offset().saturating_add(def.size() as u8 as u32);
+            if extent_end > group_offset.saturating_add(4) {
+                return false;
+            }
+        }
+        // Coverage / overlap, processed once per group (at first
+        // occurrence in array order).
+        for (anchor_idx, anchor) in field_defs.iter().enumerate() {
+            let ii = anchor.input_index();
+            if ii == 0 {
+                continue;
+            }
+            if field_defs[..anchor_idx]
+                .iter()
+                .any(|prev| prev.input_index() == ii)
+            {
+                continue;
+            }
+            let group_offset = field_defs
+                .iter()
+                .filter(|d| d.input_index() == ii)
+                .map(|d| d.offset())
+                .min()
+                .expect("group is non-empty");
+            let mut mask: u8 = 0;
+            for d in field_defs.iter().filter(|d| d.input_index() == ii) {
+                let shift = (d.offset() - group_offset) as u8;
+                let size_bits = d.size() as u8;
+                let field_mask = ((1u8 << size_bits) - 1) << shift;
+                if mask & field_mask != 0 {
+                    return false;
+                }
+                mask |= field_mask;
+            }
+            if mask != 0b1111 {
+                return false;
+            }
+        }
+
+        true
+    }
+}
diff --git a/dpdk/src/acl/context.rs b/dpdk/src/acl/context.rs
new file mode 100644
index 0000000000..5e81675734
--- /dev/null
+++ b/dpdk/src/acl/context.rs
@@ -0,0 +1,1217 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! ACL context with typestate lifecycle management.
+//!
+//! This module provides [`AclContext`], a safe RAII wrapper around DPDK's opaque
+//! [`rte_acl_ctx`][dpdk_sys::rte_acl_ctx] handle.  The context uses a **typestate** pattern to
+//! enforce the correct lifecycle at compile time:
+//!
+//! ```text
+//! AclContext<N, Configuring>  --build()-->  AclContext<N, Built>
+//!          ^                                         |
+//!          +----------------reset()------------------+
+//! ```
+//!
+//! - In the [`Configuring`] state you can add rules ([`add_rules`][AclContext::add_rules]) and
+//!   compile them ([`build`][AclContext::build]).  Mutation methods take `&mut self`, which lets
+//!   the Rust borrow checker enforce DPDK's documented constraint that these operations are **not
+//!   thread-safe**.
+//!
+//! - In the [`Built`] state you can classify packets ([`classify`][AclContext::classify]).
+//!   Classification takes `&self`, which -- combined with the `Sync` implementation -- allows safe
+//!   concurrent access from multiple threads, matching DPDK's documented thread-safety guarantee
+//!   for [`rte_acl_classify`][dpdk_sys::rte_acl_classify].
+//!
+//! The context is parameterised by a const generic `N` (the number of fields per rule).  This
+//! same `N` appears in [`Rule<N>`][super::rule::Rule] and
+//! [`AclBuildConfig<N>`][super::config::AclBuildConfig], so a field-count mismatch between rules
+//! and context is caught at compile time.
+//!
+//! # RAII
+//!
+//! When an [`AclContext`] is dropped (in any state), it calls
+//! [`rte_acl_free`][dpdk_sys::rte_acl_free] to release all DPDK-managed memory.
+//!
+//! # Examples
+//!
+//! See the [module-level documentation][super] for a complete usage example.
+
+use core::fmt;
+use core::mem::ManuallyDrop;
+use core::ptr::NonNull;
+
+use concurrency::sync::{Mutex, OnceLock};
+use errno::Errno;
+use tracing::{debug, error, trace};
+
+use super::classify::ClassifyAlgorithm;
+use super::config::{AclBuildConfig, AclCreateParams};
+use super::error::{
+    AclAddRulesError, AclBuildError, AclClassifyError, AclCreateError, AclSetAlgorithmError,
+};
+use super::field::FieldDef;
+use super::rule::Rule;
+
+/// Process-wide guard for any operation that touches DPDK's global ACL
+/// registry: [`AclContext::new`] (find_existing + create), [`Drop`] for
+/// [`AclContext`] (free), and [`dump_all_contexts`] (list dump).
+///
+/// DPDK's `rte_acl_create` does not itself fail on duplicate names: it
+/// returns the **existing** context pointer for a matching name.  Without
+/// serialization, two threads can both observe
+/// `rte_acl_find_existing -> NULL` for the same name, both call
+/// `rte_acl_create`, and both receive the same pointer -- producing two
+/// [`AclContext`] wrappers that race to free the same DPDK handle on drop.
+/// Holding this mutex across the check-and-create sequence closes the TOCTOU.
+/// Drop and list-dump take the same lock so the "registry-touching
+/// operations are serialized" invariant holds at the wrapper seam.
+///
+/// Why [`OnceLock`] rather than a `static` initializer: under the
+/// `loom`/`shuttle` model-checker backends, `concurrency::sync::Mutex::new`
+/// is not `const fn` (each instance registers with the scheduler), so a
+/// `static M: Mutex<()> = Mutex::new(())` would fail to typecheck on those
+/// configurations.  `OnceLock` + lazy init is the portable idiom across
+/// all backends.  See the module docs on `concurrency::sync`.
+///
+/// Why the concurrency facade rather than [`std::sync::Mutex`] directly:
+/// the workspace policy is poison-as-panic ("poison is a fatal invariant
+/// violation"); the facade applies that policy uniformly so call sites
+/// never see `LockResult`.
+///
+/// # Tracing reentrancy
+///
+/// The lock is **not** reentrant.  Anything that runs while a thread holds
+/// this lock -- including `tracing` layers invoked by the [`debug!`] /
+/// [`error!`] / `#[tracing::instrument]` macros sprinkled through the
+/// surrounding methods -- must not call back into any ACL wrapper API that
+/// would re-acquire it: [`AclContext::new`], [`dump_all_contexts`], or
+/// dropping any [`AclContext`].  Doing so deadlocks the calling thread on
+/// its own previously-acquired guard.  The default `tracing-subscriber`
+/// configuration never touches ACL, but custom layers (e.g. one that
+/// resolves the context name from a registry lookup for log enrichment)
+/// could trip this if added later.
+static ACL_CREATE_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
+
+/// Lazy accessor for [`ACL_CREATE_LOCK`].
+fn acl_create_lock() -> &'static Mutex<()> {
+    ACL_CREATE_LOCK.get_or_init(|| Mutex::new(()))
+}
+
+// ---------------------------------------------------------------------------
+// Typestate markers
+// ---------------------------------------------------------------------------
+
+/// Typestate: the context is accepting rule mutations and has not yet been compiled.
+///
+/// Methods available in this state:
+/// - [`add_rules`][AclContext::add_rules] (`&mut self`)
+/// - [`reset_rules`][AclContext::reset_rules] (`&mut self`)
+/// - [`build`][AclContext::build] (consumes `self`, transitions to [`Built`])
+///
+/// Carries the [`AclBuildConfig<N>`] that the context was created with so
+/// that [`add_rules`][AclContext::add_rules] can validate each
+/// [`Rule<N>`]'s field values against the layout (catching e.g. an
+/// out-of-range prefix length before it reaches DPDK's C shift in
+/// `RTE_ACL_MASKLEN_TO_BITMASK`) and [`build`][AclContext::build] can
+/// dispatch with no extra arguments.
+#[derive(Debug, Clone)]
+pub struct Configuring<const N: usize> {
+    config: AclBuildConfig<N>,
+}
+
+/// Typestate: the context has been compiled and is ready for packet classification.
+///
+/// Methods available in this state:
+/// - [`classify`][AclContext::classify] (`&self`, thread-safe)
+/// - [`classify_with_algorithm`][AclContext::classify_with_algorithm] (`&self`, thread-safe)
+/// - [`set_default_algorithm`][AclContext::set_default_algorithm] (`&mut self`)
+/// - [`reset`][AclContext::reset] (consumes `self`, transitions back to [`Configuring`])
+///
+/// Carries the [`AclBuildConfig<N>`] that produced this build so that
+/// downstream code can query the field layout and category count without
+/// recomputing or re-passing it.  Read via
+/// [`build_config`][AclContext::build_config].
+#[derive(Debug, Clone)]
+pub struct Built<const N: usize> {
+    config: AclBuildConfig<N>,
+}
+
+/// Sealed marker trait for valid [`AclContext`] typestates.
+///
+/// Implemented for [`Configuring`] and [`Built<N>`].
+///
+/// `Send` is a supertrait because [`AclContext`] has a blanket `unsafe
+/// impl<State: AclState> Send`; the supertrait guarantees the state's own
+/// auto-trait obligations are respected (e.g. an internal typestate that
+/// held an `Rc<_>` could not implement `AclState` at all, which is the
+/// desired outcome).
+///
+/// `Sync` is deliberately **not** a supertrait.  Per-state `unsafe impl
+/// Sync` blocks are the single audit gate: adding a new typestate
+/// requires writing a fresh `unsafe impl Sync for AclContext<N, NewState>`
+/// (or omitting it and getting a non-`Sync` context).  A `Sync` supertrait
+/// would mean every state mechanically gains `Sync` just by satisfying
+/// the trait bound, hiding the per-state audit.
+pub trait AclState: sealed::Sealed + Send {}
+
+mod sealed {
+    /// Sealed-trait support for [`super::AclState`].  External crates cannot
+    /// implement this trait, so they cannot add new typestates that would
+    /// inherit [`Send`]/[`Sync`].
+    pub trait Sealed {}
+    impl<const N: usize> Sealed for super::Configuring<N> {}
+    impl<const N: usize> Sealed for super::Built<N> {}
+}
+
+impl<const N: usize> AclState for Configuring<N> {}
+impl<const N: usize> AclState for Built<N> {}
+
+// ---------------------------------------------------------------------------
+// Build failure
+// ---------------------------------------------------------------------------
+
+/// Returned when [`AclContext::build`] fails.
+///
+/// Because `build` consumes the [`Configuring`] context, this error wraps
+/// **both** the error description and the original context so the caller can
+/// recover, inspect, or drop it.  The returned context is still in
+/// [`Configuring`] state and **retains any rules previously added via**
+/// [`add_rules`][AclContext::add_rules] -- `build` does not call
+/// `rte_acl_reset_rules` on failure.  Callers who want a clean slate must
+/// invoke [`reset_rules`][AclContext::reset_rules] on the returned context.
+///
+/// # Example
+///
+/// ```ignore
+/// match ctx.build() {
+///     Ok(built) => { /* use built context */ }
+///     Err(failure) => {
+///         eprintln!("build failed: {}", failure.error);
+///         // The original context is still usable; previously-added rules are
+///         // still present.  Clear them if you want to retry from scratch:
+///         let mut ctx = failure.context;
+///         ctx.reset_rules();
+///     }
+/// }
+/// ```
+#[derive(thiserror::Error)]
+#[error("ACL build failed for context '{}'", self.context.name())]
+pub struct AclBuildFailure<const N: usize> {
+    /// The build error.
+    #[source]
+    pub error: AclBuildError,
+    /// The original context, returned in [`Configuring`] state so it can be reused or dropped.
+    pub context: AclContext<N, Configuring<N>>,
+}
+
+// Hand-rolled Debug because `AclContext` does not derive `Debug` (and
+// embedding the full context state in error logs would be noise).
+impl<const N: usize> fmt::Debug for AclBuildFailure<N> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("AclBuildFailure")
+            .field("error", &self.error)
+            .field("context_name", &self.context.name())
+            .finish()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// AclContext
+// ---------------------------------------------------------------------------
+
+/// A DPDK ACL context parameterised by field count `N` and lifecycle state `State`.
+///
+/// See the [module documentation][self] for an overview of the typestate lifecycle.
+///
+/// # Type parameters
+///
+/// - `N`: the number of fields per rule.  Must match across [`Rule<N>`][super::rule::Rule],
+///   [`AclBuildConfig<N>`][super::config::AclBuildConfig], and this context.
+/// - `State`: one of [`Configuring`] or [`Built`].  Defaults to [`Configuring`] for newly created
+///   contexts.
+pub struct AclContext<const N: usize, State = Configuring<N>> {
+    /// Raw DPDK context handle.  Non-null invariant maintained at all times.
+    ctx: NonNull<dpdk_sys::rte_acl_ctx>,
+    /// The validated parameters that were used to create this context.
+    /// `AclCreateParams<N>` ties the field-count to the context's `N` so a
+    /// mismatch is a compile-time error rather than UB at `rte_acl_add_rules`
+    /// time.
+    params: AclCreateParams<N>,
+    /// Per-state data: both [`Configuring<N>`] and [`Built<N>`] carry the
+    /// [`AclBuildConfig<N>`] (the [`Built`] copy is the one used by
+    /// `rte_acl_build`; the [`Configuring`] copy is used to validate rules
+    /// at `add_rules` time and is what `build` will pass to DPDK).
+    state: State,
+}
+
+// The DPDK ACL context handle is a heap allocation -- it is not inherently tied to any particular
+// thread, so `Send` is correct for any state that itself is `Send`.  The
+// blanket `Send` impl across `State: AclState` is fine because the trait's
+// `Send` supertrait already guarantees the per-state portion is `Send`.
+unsafe impl<const N: usize, State: AclState> Send for AclContext<N, State> {}
+
+// `Sync` is **not** blanket.  Each typestate must explicitly opt in with
+// its own `unsafe impl` so that adding a new typestate forces the author
+// to write a fresh reentrancy audit.  A blanket `impl<State: AclState>
+// Sync` would let a new state silently inherit Sync just by implementing
+// the (sealed) `AclState` supertrait -- which would obscure the audit
+// requirement.
+//
+// The load-bearing claim for `Sync` is that **every method on AclContext
+// reachable through a shared `&self` is reentrant** -- i.e. two threads
+// each holding `&self` cannot race against each other through any safe
+// API.  `Sync` already follows tautologically from `&mut self` discipline
+// on the mutation methods; the non-trivial claim is what the `&self`
+// methods do.
+//
+// `&self` methods reachable in any state ("all-states" impl on
+// `AclContext<N, State>`) and their reentrancy story:
+// - `name()`, `params()`, `as_raw_ptr()` -- read-only access to immutable
+//   fields stored in the wrapper.  Trivially reentrant.
+//
+// `dump()` is explicitly **not** in the `&self` set: it takes `&mut self`,
+// which sidesteps any reentrancy claim against `rte_acl_dump`'s
+// implementation details.  Even though the current DPDK source only reads
+// from the context inside `rte_acl_dump`, the `&mut self` borrow makes
+// the argument robust against any future DPDK change that adds caching
+// or other mutation inside the dump path.  See the `dump` doc for the
+// rationale.  Listing `dump` here would be a documentation lie that
+// could mislead a future reviewer into believing the `&self` Sync claim
+// covered it.
+//
+// Cross-context registry mutation (Drop and `dump_all_contexts`) is
+// protected by [`ACL_CREATE_LOCK`] at the Rust seam, so it does not
+// participate in the per-context `&self` reentrancy story.
+
+// Sync impl for the [`Configuring<N>`] state.
+//
+// `&self` methods reachable here are exactly the all-states ones above
+// (`name`, `params`, `as_raw_ptr`).  No `Configuring`-specific `&self`
+// method exists; all rule mutation, `dump`, and the `build` transition
+// take `&mut self` / consume `self`, which `Sync` does not concern.
+unsafe impl<const N: usize> Sync for AclContext<N, Configuring<N>> {}
+
+// Sync impl for the [`Built<N>`] state.
+//
+// In addition to the all-states `&self` methods, `Built<N>` exposes
+// `classify` / `classify_with_algorithm` (DPDK documents these as
+// thread-safe), `build_config`/`num_categories`/`field_defs`
+// (read-only accessors into the stored config).  All reentrant.
+unsafe impl<const N: usize> Sync for AclContext<N, Built<N>> {}
+
+// ---------------------------------------------------------------------------
+// Methods available in ALL states
+// ---------------------------------------------------------------------------
+
+impl<const N: usize, State> AclContext<N, State> {
+    /// Get the context name (as passed to [`AclCreateParams::new`]).
+    #[must_use]
+    #[inline]
+    pub fn name(&self) -> &str {
+        self.params.name()
+    }
+
+    /// Get the creation parameters.
+    #[must_use]
+    #[inline]
+    pub fn params(&self) -> &AclCreateParams<N> {
+        &self.params
+    }
+
+    /// Get the raw DPDK context pointer for read-only FFI.
+    ///
+    /// Returning a raw pointer is itself a safe operation; *using* the pointer
+    /// in any FFI call is what is unsafe, and that obligation already lives on
+    /// those FFI signatures.  Mirrors the safety story of
+    /// [`Box::as_ptr`][core::ptr] and similar std accessors.
+    ///
+    /// For DPDK calls that take `*mut rte_acl_ctx` (e.g. `rte_acl_add_rules`,
+    /// `rte_acl_reset`, `rte_acl_set_ctx_classify`), use
+    /// [`as_raw_mut_ptr`][AclContext::as_raw_mut_ptr] instead so the
+    /// `&mut self` requirement carries the typestate's mutability discipline
+    /// into raw FFI code.
+    ///
+    /// # Lifetime
+    ///
+    /// The returned pointer is valid only while `self` is alive.  Raw pointers
+    /// in Rust do **not** carry lifetimes, so the borrow checker will not catch
+    /// use-after-free of this pointer past a [`Drop`] of the context.  Treat
+    /// the result as borrowed from `&self`: pass it straight to the FFI call
+    /// and do not hold it across moves or drops of the context.
+    #[must_use]
+    #[inline]
+    pub fn as_raw_ptr(&self) -> *const dpdk_sys::rte_acl_ctx {
+        self.ctx.as_ptr()
+    }
+
+    /// Get the raw DPDK context pointer for mutating FFI.
+    ///
+    /// Taking `&mut self` mirrors the typestate's mutability discipline: a
+    /// caller cannot obtain a `*mut rte_acl_ctx` from a shared borrow of the
+    /// context, preventing data races between concurrent
+    /// `rte_acl_classify` (which takes `&self`) and any mutating FFI call
+    /// the caller might make through this pointer.
+    ///
+    /// See [`as_raw_ptr`][AclContext::as_raw_ptr] for the lifetime caveat
+    /// (raw pointers do not carry lifetimes in Rust; treat this one as
+    /// borrowed from `&mut self`).
+    #[must_use]
+    #[inline]
+    pub fn as_raw_mut_ptr(&mut self) -> *mut dpdk_sys::rte_acl_ctx {
+        self.ctx.as_ptr()
+    }
+
+    /// Dump the context's internal state to stdout via
+    /// [`rte_acl_dump`][dpdk_sys::rte_acl_dump].
+    ///
+    /// This is a debugging aid.  Output goes to stdout and is not captured
+    /// by the tracing subsystem.  Under `cargo nextest`, stdout is captured
+    /// per test and only surfaced on failure or with `--no-capture`; under
+    /// `cargo test`, stdout is captured by default unless `--nocapture` is
+    /// passed.  Either way, the output will not appear in the tracing
+    /// stream -- redirect or run the harness with capture disabled if you
+    /// need to read it interactively.
+    ///
+    /// # `&mut self`
+    ///
+    /// Takes `&mut self` rather than `&self` even though
+    /// [`rte_acl_dump`][dpdk_sys::rte_acl_dump] is read-only on the
+    /// current DPDK source.  The exclusive borrow side-steps a
+    /// pin-to-DPDK-version reentrancy audit: any future change to DPDK
+    /// that adds caching inside `rte_acl_dump` would silently invalidate
+    /// a `&self` claim, but cannot affect `&mut self` (no other thread
+    /// has access to the context for the duration of the call).
+    #[cold]
+    pub fn dump(&mut self) {
+        // SAFETY: rte_acl_dump operates on the single context pointed
+        // at by `self.ctx` and does not touch the global registry, so
+        // no ACL_CREATE_LOCK acquisition is required.  The `&mut self`
+        // borrow guarantees we have exclusive access to this context,
+        // covering any future DPDK change that adds mutation inside
+        // `rte_acl_dump`.
+        unsafe { dpdk_sys::rte_acl_dump(self.ctx.as_ptr()) }
+    }
+
+    /// Decompose the context into its raw parts **without** running the destructor.
+    ///
+    /// Used internally to implement typestate transitions: the raw pointer,
+    /// params, and per-state data are moved out, and [`ManuallyDrop`] prevents
+    /// the old value's [`Drop`] from freeing the DPDK handle.
+    fn into_parts(self) -> (NonNull<dpdk_sys::rte_acl_ctx>, AclCreateParams<N>, State) {
+        let this = ManuallyDrop::new(self);
+        let ctx = this.ctx;
+        // SAFETY: `this` is wrapped in ManuallyDrop, so its Drop will not run
+        // and the fields will not be double-freed when this function returns.
+        // Moving `params` out via ptr::read yields exactly one owner of the
+        // AclCreateParams<N>.
+        let params = unsafe { core::ptr::read(&this.params) };
+        // SAFETY: same reasoning as the params move above -- `this` is
+        // ManuallyDrop, so reading `state` produces a single owner.
+        let state = unsafe { core::ptr::read(&this.state) };
+        (ctx, params, state)
+    }
+}
+
+impl<const N: usize, State> fmt::Debug for AclContext<N, State> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("AclContext")
+            .field("name", &self.name())
+            .field("num_fields", &N)
+            .field("ptr", &self.ctx)
+            .finish()
+    }
+}
+
+impl<const N: usize, State> fmt::Display for AclContext<N, State> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "AclContext<{N}>({:?})", self.name())
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Configuring state
+// ---------------------------------------------------------------------------
+
+impl<const N: usize> AclContext<N, Configuring<N>> {
+    /// Create a new ACL context in the [`Configuring`] state.
+    ///
+    /// This is a safe wrapper around [`rte_acl_create`][dpdk_sys::rte_acl_create].
+    ///
+    /// # Arguments
+    ///
+    /// * `params` -- validated creation parameters (see [`AclCreateParams::new`]).
+    /// * `config` -- validated build parameters (see
+    ///   [`AclBuildConfig::new`]).  The context retains the config for the
+    ///   lifetime of the [`Configuring`] state and uses it to validate
+    ///   [`Rule<N>`] values at [`add_rules`][AclContext::add_rules] time and
+    ///   to dispatch [`build`][AclContext::build] without re-supplying it.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`AclCreateError`] if DPDK fails to allocate the context.  This
+    /// includes the case where the DPDK EAL has not been initialized:
+    /// `rte_acl_create` returns NULL with `rte_errno` set, which surfaces as
+    /// [`AclCreateError::InvalidParams`] or [`AclCreateError::Unknown`].  The
+    /// failure is graceful -- this is a regular error path, not undefined
+    /// behavior.
+    #[cold]
+    #[tracing::instrument(level = "debug", skip(params, config), fields(name = params.name()))]
+    pub fn new(
+        params: AclCreateParams<N>,
+        config: AclBuildConfig<N>,
+    ) -> Result<Self, AclCreateError> {
+        // Serialize the find_existing + create sequence with a process-wide
+        // mutex (see [`ACL_CREATE_LOCK`]).  Without this, two threads can
+        // both observe find_existing -> NULL, both call rte_acl_create, and
+        // both receive the same DPDK pointer (since rte_acl_create returns
+        // the existing context for a duplicate name), producing two
+        // AclContext wrappers that race to free the same handle on drop.
+        //
+        // Lock acquisition uses the concurrency facade, which treats poison
+        // as a fatal invariant violation and panics rather than handing
+        // back a `LockResult`.  That matches the workspace policy: a
+        // prior holder panicking while the registry was being mutated
+        // leaves DPDK's TAILQ in an unknown state, and continuing
+        // silently could lead to use-after-free.  Aborting via the
+        // panic is the only safe answer.
+        let _create_guard = acl_create_lock().lock();
+
+        // Pre-flight: DPDK's `rte_acl_create` silently returns the existing
+        // context for a duplicate name.  Refuse if one is already registered.
+        //
+        // SAFETY: name_cstr returns a valid, NUL-terminated C string borrowed
+        // from `params`; `rte_acl_find_existing` only reads through that
+        // pointer and does not retain it.
+        let existing = unsafe { dpdk_sys::rte_acl_find_existing(params.name_cstr().as_ptr()) };
+        if !existing.is_null() {
+            error!(
+                "rte_acl_find_existing found context '{}' already registered",
+                params.name(),
+            );
+            return Err(AclCreateError::AlreadyExists {
+                name: params.name().to_owned(),
+            });
+        }
+
+        let raw_params = params.to_raw();
+
+        // SAFETY: raw_params borrows from `params` (which is on the stack and
+        // lives through the call), so the contained `name` pointer is valid for
+        // the duration of `rte_acl_create`.  The `RawParams<'_>` lifetime
+        // statically prevents misuse.
+        let ctx_ptr = unsafe { dpdk_sys::rte_acl_create(raw_params.as_ptr()) };
+
+        let ctx = match NonNull::new(ctx_ptr) {
+            Some(ptr) => ptr,
+            None => {
+                let rte_errno = unsafe { dpdk_sys::rte_errno_get() };
+                error!(
+                    "rte_acl_create failed for '{}': rte_errno = {rte_errno}",
+                    params.name(),
+                );
+                return Err(match rte_errno {
+                    errno::EINVAL => AclCreateError::InvalidParams,
+                    errno::ENOMEM => AclCreateError::OutOfMemory,
+                    other => AclCreateError::Unknown(Errno(other)),
+                });
+            }
+        };
+
+        debug!(
+            "Created ACL context '{}' at {:p} (rule_size={}, max_rules={})",
+            params.name(),
+            ctx_ptr,
+            params.rule_size(),
+            params.max_rule_num(),
+        );
+
+        Ok(Self {
+            ctx,
+            params,
+            state: Configuring { config },
+        })
+    }
+
+    /// Borrow the [`AclBuildConfig<N>`] this context was created with.
+    ///
+    /// Symmetric with [`build_config`][AclContext::build_config] on
+    /// [`AclContext<N, Built<N>>`].
+    #[must_use]
+    #[inline]
+    pub fn build_config(&self) -> &AclBuildConfig<N> {
+        &self.state.config
+    }
+
+    /// Add rules to the context.
+    ///
+    /// This is a safe wrapper around [`rte_acl_add_rules`][dpdk_sys::rte_acl_add_rules].
+    ///
+    /// Takes `&mut self` because DPDK documents this operation as **not thread-safe**.
+    ///
+    /// # Arguments
+    ///
+    /// * `rules` -- a slice of [`Rule<N>`] to add.  Each rule must have its fields in the same
+    ///   order as the [`FieldDef`]s that will be used at build time.
+    ///   All field values must be in **host byte order**.
+    ///
+    /// Each rule is validated against this context's [`AclBuildConfig<N>`]
+    /// (the one passed to [`AclContext::new`]) before being handed to
+    /// `rte_acl_add_rules`.  In particular, a
+    /// [`FieldType::Mask`][super::field::FieldType::Mask] field whose
+    /// `mask_range` (interpreted as a prefix length) exceeds the field's
+    /// bit width is rejected here -- if it were forwarded to DPDK, the
+    /// `RTE_ACL_MASKLEN_TO_BITMASK` macro would perform a C shift by
+    /// `>= 8 * size`, which is undefined behaviour.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`AclAddRulesError`] when a rule fails wrapper-side validation
+    /// ([`AclAddRulesError::InvalidRule`], which carries the offending
+    /// rule's index in the slice) or when DPDK itself rejects the rules
+    /// (e.g. the context is full or the rules are invalid).  DPDK does
+    /// **not** report which rule it rejected; the wrapper-side check
+    /// catches the soundness-critical cases up-front, and for other
+    /// rejections you may need to bisect by submitting smaller
+    /// sub-slices.
+    #[cold]
+    #[tracing::instrument(level = "debug", skip(self, rules), fields(name = self.name(), count = rules.len()))]
+    pub fn add_rules(&mut self, rules: &[Rule<N>]) -> Result<(), AclAddRulesError> {
+        if rules.is_empty() {
+            debug!("add_rules called with empty slice -- no-op");
+            return Ok(());
+        }
+
+        // Wrapper-side validation against this context's AclBuildConfig.
+        // Catches soundness-critical mismatches (e.g. an out-of-range
+        // prefix length for a Mask field) before they reach DPDK's C code.
+        for (rule_index, rule) in rules.iter().enumerate() {
+            rule.validate(&self.state.config)
+                .map_err(|source| AclAddRulesError::InvalidRule { rule_index, source })?;
+        }
+
+        // The length must fit in a u32 for the DPDK API.
+        let num: u32 = rules.len().try_into().map_err(|_| {
+            error!("Rule count {} exceeds u32::MAX", rules.len());
+            AclAddRulesError::TooManyRules { len: rules.len() }
+        })?;
+
+        // SAFETY:
+        // - `Rule<N>` is #[repr(C)] with identical layout to `RTE_ACL_RULE_DEF(_, N)`.
+        //   The `rte_acl_rule` type is the "base" struct with a flexible array member; the
+        //   `rule_size` parameter passed at context creation tells DPDK the actual stride.
+        // - The pointer is valid for `num` consecutive `Rule<N>` elements.
+        let ret = unsafe {
+            dpdk_sys::rte_acl_add_rules(
+                self.ctx.as_ptr(),
+                rules.as_ptr() as *const dpdk_sys::rte_acl_rule,
+                num,
+            )
+        };
+
+        if ret != 0 {
+            error!(
+                "rte_acl_add_rules failed for '{}': ret = {ret}",
+                self.name(),
+            );
+            return Err(match ret {
+                errno::NEG_ENOMEM => AclAddRulesError::OutOfMemory,
+                errno::NEG_EINVAL => AclAddRulesError::InvalidParams,
+                other => AclAddRulesError::Unknown(Errno(other)),
+            });
+        }
+
+        debug!("Added {num} rules to ACL context '{}'", self.name(),);
+        Ok(())
+    }
+
+    /// Delete all rules from the context without destroying compiled runtime structures.
+    ///
+    /// Safe wrapper around [`rte_acl_reset_rules`][dpdk_sys::rte_acl_reset_rules].
+    ///
+    /// Takes `&mut self` because DPDK documents this operation as **not thread-safe**.
+    ///
+    /// # `reset_rules` vs [`reset`][AclContext::reset]
+    ///
+    /// The two reset entry points are distinguished by the state they
+    /// operate on:
+    ///
+    /// | method | available in | takes | clears rules | clears compiled structures | state after |
+    /// |--------|--------------|-------|--------------|----------------------------|-------------|
+    /// | `reset_rules` | [`Configuring`] | `&mut self` | yes | no (no compiled structures exist yet) | [`Configuring`] (unchanged) |
+    /// | [`reset`][AclContext::reset] | [`Built`] | `self` (consumes) | yes | yes (calls `rte_acl_reset`) | [`Configuring`] |
+    ///
+    /// Both keep the [`AclBuildConfig<N>`] that was originally supplied to
+    /// [`AclContext::new`]; the next [`build`][AclContext::build] takes no
+    /// config argument.  To switch to a different field layout, drop the
+    /// context and create a new one with the new config.
+    ///
+    /// The shape difference (`&mut self` vs consuming) is forced by the
+    /// typestate transition: `reset` changes the type, so it must consume
+    /// the value; `reset_rules` keeps the same type and so can mutate in
+    /// place.
+    #[cold]
+    #[tracing::instrument(level = "debug", skip(self), fields(name = self.name()))]
+    pub fn reset_rules(&mut self) {
+        // SAFETY: rte_acl_reset_rules mutates only the context pointed
+        // at by `self.ctx`; `&mut self` guarantees exclusive access for
+        // the duration of the call.
+        unsafe { dpdk_sys::rte_acl_reset_rules(self.ctx.as_ptr()) };
+        debug!("Reset rules for ACL context '{}'", self.name());
+    }
+
+    /// Compile the rules into optimized runtime lookup structures.
+    ///
+    /// Safe wrapper around [`rte_acl_build`][dpdk_sys::rte_acl_build].  The
+    /// build config supplied to [`AclContext::new`] is forwarded to DPDK
+    /// here; this method takes no config argument.
+    ///
+    /// On success, the context transitions from [`Configuring`] to [`Built`] and is ready for
+    /// packet classification.
+    ///
+    /// On failure, the original context is returned inside [`AclBuildFailure`] so that the caller
+    /// can recover, inspect, or drop it.  The rules remain intact; the caller may adjust rules
+    /// and try again.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`AclBuildFailure`] wrapping an [`AclBuildError`] on failure.
+    #[cold]
+    #[tracing::instrument(level = "debug", skip(self), fields(name = self.name()))]
+    pub fn build(self) -> Result<AclContext<N, Built<N>>, AclBuildFailure<N>> {
+        let raw_cfg = self.state.config.to_raw();
+
+        // SAFETY: `raw_cfg` is a stack-local copy with no dangling pointers and lives through
+        // the `rte_acl_build` call.
+        let ret = unsafe { dpdk_sys::rte_acl_build(self.ctx.as_ptr(), &raw_cfg) };
+
+        if ret != 0 {
+            error!("rte_acl_build failed for '{}': ret = {ret}", self.name(),);
+            let error = match ret {
+                errno::NEG_ENOMEM => AclBuildError::OutOfMemory,
+                errno::NEG_EINVAL => AclBuildError::InvalidConfig,
+                errno::NEG_ERANGE => AclBuildError::ExceededMaxSize,
+                other => AclBuildError::Unknown(Errno(other)),
+            };
+            return Err(AclBuildFailure {
+                error,
+                context: self,
+            });
+        }
+
+        debug!("Built ACL context '{}'", self.name());
+
+        // Transition: Configuring -> Built.  The config moves from
+        // Configuring into Built without a clone -- both states hold the
+        // same logical artifact.
+        let (ctx, params, old_state) = self.into_parts();
+        Ok(AclContext {
+            ctx,
+            params,
+            state: Built {
+                config: old_state.config,
+            },
+        })
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Built state
+// ---------------------------------------------------------------------------
+
+impl<const N: usize> AclContext<N, Built<N>> {
+    /// Borrow the [`AclBuildConfig<N>`] used to compile this context.
+    ///
+    /// Useful when classify-time code needs to know the field layout (offsets,
+    /// sizes) or the number of categories without threading the config through
+    /// the call chain.
+    #[must_use]
+    #[inline]
+    pub fn build_config(&self) -> &AclBuildConfig<N> {
+        &self.state.config
+    }
+
+    /// Get the number of categories used at build time.
+    ///
+    /// Shorthand for `self.build_config().num_categories()`.
+    #[must_use]
+    #[inline]
+    pub fn num_categories(&self) -> u32 {
+        self.state.config.num_categories()
+    }
+
+    /// Borrow the field definitions used at build time.
+    ///
+    /// Shorthand for `self.build_config().field_defs()`.
+    #[must_use]
+    #[inline]
+    pub fn field_defs(&self) -> &[FieldDef; N] {
+        self.state.config.field_defs()
+    }
+
+    /// Classify input data buffers against the compiled rules.
+    ///
+    /// This is the **hot-path** function and the primary reason the ACL context exists.
+    /// It is a thin wrapper around [`rte_acl_classify`][dpdk_sys::rte_acl_classify];
+    /// the function is `unsafe` because the per-pointer buffer-size precondition
+    /// cannot be expressed in the type system (see the `# Safety` section below).
+    ///
+    /// Takes `&self` because DPDK documents classification as **thread-safe**.  An
+    /// `Arc<AclContext<N, Built>>` can be shared across threads for concurrent classification.
+    ///
+    /// # Arguments
+    ///
+    /// * `data` -- array of pointers to input data buffers.  Each pointer should reference the
+    ///   first byte of the region described by the [`FieldDef`] offsets.
+    ///   All fields in the input buffers must be in **network byte order** (MSB).
+    /// * `results` -- output array to receive match results.  Must have at least
+    ///   `data.len() * categories` elements.  Each result is either `0` (no match) or the
+    ///   `userdata` value of the highest-priority matching rule for that (buffer, category) pair.
+    /// * `categories` -- number of match categories.  Must be between 1 and
+    ///   [`MAX_CATEGORIES`][super::config::MAX_CATEGORIES] (inclusive), and either 1 or a multiple
+    ///   of [`RESULTS_MULTIPLIER`][super::config::RESULTS_MULTIPLIER].
+    ///
+    /// # Errors
+    ///
+    /// Returns [`AclClassifyError::InvalidArgs`] if:
+    /// - The `results` slice is too small for `data.len() * categories` entries.
+    /// - `data.len()` exceeds `u32::MAX`.
+    /// - `categories` is zero, exceeds [`MAX_CATEGORIES`][super::config::MAX_CATEGORIES],
+    ///   is not `1` or a multiple of [`RESULTS_MULTIPLIER`][super::config::RESULTS_MULTIPLIER],
+    ///   or exceeds the [`num_categories`][super::config::AclBuildConfig::num_categories]
+    ///   the context was built with.
+    ///
+    /// Returns the appropriate error variant if DPDK itself rejects the arguments.
+    ///
+    /// # Safety
+    ///
+    /// Every pointer in `data` must be valid for reads of at least
+    /// [`AclBuildConfig::min_input_size`][super::config::AclBuildConfig::min_input_size]
+    /// bytes, where the build config is the one returned by
+    /// [`build_config`][AclContext::build_config].  DPDK reads from those
+    /// buffers without bounds checks and a dangling, null, or too-small
+    /// pointer is undefined behavior.
+    ///
+    /// The bound is **wider** than `max(field.offset + field.size)`: DPDK's
+    /// classify loop performs 4-byte aligned loads where each load's
+    /// starting offset is the **lowest `FieldDef.offset` within an
+    /// `input_index` group** (this is what DPDK's `data_index` is built
+    /// from at `rte_acl_build` time).  Concretely,
+    /// [`min_input_size`][super::config::AclBuildConfig::min_input_size]
+    /// returns `max(group_offset + 4)` across all `input_index` groups in
+    /// the field-def array, which is the upper bound on the byte offset
+    /// DPDK may read from.  The grouping validation in
+    /// [`AclBuildConfig::new`][super::config::AclBuildConfig::new]
+    /// guarantees this is at least `max(field.offset + field.size)`, so
+    /// callers do not need to also account for the per-field extent
+    /// separately.
+    ///
+    /// The data array itself is read-only.  bindgen generates `data: *mut *const
+    /// u8` (the loose C signature is `const uint8_t **`), but DPDK only reads
+    /// the array: `acl_set_flow` in `lib/eal/acl/acl_run.h` stores the pointer
+    /// once, and the only access site dereferences `flows->data[i]` for read.
+    /// The `.cast_mut()` below is a type accommodation for the bindgen signature
+    /// and does not license writes through it.
+    ///
+    /// A future safe wrapper could enforce this statically via `&[&[u8; STRIDE]]`
+    /// where `STRIDE` is derived from the field layout; deferred until a
+    /// concrete consumer demonstrates the shape it wants.
+    #[inline]
+    pub unsafe fn classify(
+        &self,
+        data: &[*const u8],
+        results: &mut [u32],
+        categories: u32,
+    ) -> Result<(), AclClassifyError> {
+        let num = self.validate_classify_args(data, results, categories)?;
+
+        // SAFETY:
+        // - data and results slice lengths have been validated.
+        // - The pointer validity precondition on the individual buffers is
+        //   forwarded to our caller via the `unsafe fn` signature.
+        // - The `.cast_mut()` is sound because DPDK only reads the data
+        //   array (see the # Safety section above for the source citation).
+        let ret = unsafe {
+            dpdk_sys::rte_acl_classify(
+                self.ctx.as_ptr(),
+                data.as_ptr().cast_mut(),
+                results.as_mut_ptr(),
+                num,
+                categories,
+            )
+        };
+
+        if ret != 0 {
+            trace!(
+                "rte_acl_classify returned {ret} for context '{}'",
+                self.name(),
+            );
+            return Err(match ret {
+                errno::NEG_EINVAL => AclClassifyError::InvalidArgs,
+                other => AclClassifyError::Unknown(Errno(other)),
+            });
+        }
+
+        Ok(())
+    }
+
+    /// Classify input data buffers using a specific SIMD algorithm.
+    ///
+    /// Identical to [`classify`][AclContext::classify] except that the caller explicitly selects
+    /// the classification algorithm instead of using the context's default.
+    ///
+    /// Thin wrapper around
+    /// [`rte_acl_classify_alg`][fn@dpdk_sys::rte_acl_classify_alg], except
+    /// when `algorithm == ClassifyAlgorithm::Default`: see the
+    /// "[`Default`][ClassifyAlgorithm::Default] is special" note below.
+    ///
+    /// # `Default` is special
+    ///
+    /// `rte_acl_classify_alg(ctx, ..., RTE_ACL_CLASSIFY_DEFAULT)` dispatches
+    /// table slot 0 in DPDK's classify dispatch table, which is the
+    /// **scalar** implementation -- not "DPDK's best available".  Only
+    /// [`rte_acl_set_ctx_classify`][dpdk_sys::rte_acl_set_ctx_classify]
+    /// expands `Default` to the best available variant on the current CPU.
+    /// To honour the "use the context's default algorithm" intent without
+    /// silently forcing scalar, this wrapper dispatches through
+    /// [`rte_acl_classify`] (which uses the context's currently-set
+    /// algorithm) when `algorithm == ClassifyAlgorithm::Default`.  Any
+    /// other variant goes directly to `rte_acl_classify_alg`.
+    ///
+    /// [`rte_acl_classify`]: dpdk_sys::rte_acl_classify
+    ///
+    /// # Arguments
+    ///
+    /// See [`classify`][AclContext::classify] for `data`, `results`, and `categories`.
+    ///
+    /// * `algorithm` -- the SIMD implementation to use for this call.  The caller
+    ///   is responsible for ensuring the selected algorithm is supported on the
+    ///   current CPU; see the `# Safety` section below.
+    ///
+    /// # Errors
+    ///
+    /// Same as [`classify`][AclContext::classify], plus
+    /// [`AclClassifyError::NotSupported`] if the underlying
+    /// `rte_acl_classify_alg` returns `-ENOTSUP` (typically because a non-stub
+    /// SIMD slot was selected but DPDK still reported it as unsupported).
+    ///
+    /// # Safety
+    ///
+    /// Same pointer-validity precondition as [`classify`][AclContext::classify], plus:
+    ///
+    /// `algorithm` must be implemented and runnable on the current CPU.
+    /// Unlike
+    /// [`set_default_algorithm`][AclContext::set_default_algorithm] (which
+    /// delegates to `rte_acl_set_ctx_classify` and which validates against
+    /// the per-CPU capability table before installing the algorithm),
+    /// [`rte_acl_classify_alg`][fn@dpdk_sys::rte_acl_classify_alg] does
+    /// **not** pre-check feature support; it dispatches straight through
+    /// the classify function-pointer table.  Selecting a real SIMD variant
+    /// that the host does not implement therefore executes unsupported
+    /// instructions (SIGILL or silent corruption) rather than returning an
+    /// error.
+    ///
+    /// `ClassifyAlgorithm::Scalar` is always safe.
+    /// `ClassifyAlgorithm::Default` is also safe and is routed through
+    /// `rte_acl_classify` (see the "`Default` is special" section above), so
+    /// it picks up whatever variant `set_default_algorithm` previously
+    /// vetted.  Every other variant requires the caller to confirm CPU
+    /// support out-of-band (e.g. via `is_x86_feature_detected!` or
+    /// `std::arch::is_aarch64_feature_detected!`).
+    ///
+    /// Note that an unsupported-but-stubbed-out slot (DPDK ships scalar
+    /// fallbacks for some entries on builds where the SIMD codepath was
+    /// disabled) will return `-ENOTSUP` through the FFI, surfacing as
+    /// [`AclClassifyError::NotSupported`] -- the unsafe contract is about
+    /// the case where the slot is a real, non-stub SIMD entry whose
+    /// instructions the CPU cannot execute.
+    #[inline]
+    pub unsafe fn classify_with_algorithm(
+        &self,
+        data: &[*const u8],
+        results: &mut [u32],
+        categories: u32,
+        algorithm: ClassifyAlgorithm,
+    ) -> Result<(), AclClassifyError> {
+        // See doc comment: `Default` through `rte_acl_classify_alg` would
+        // pin table slot 0 (scalar) rather than "the context's default".
+        // Dispatch through `rte_acl_classify` instead so the call honours
+        // whatever the context was last configured with.  (Argument
+        // validation runs once, inside the delegated `classify`.)
+        if matches!(algorithm, ClassifyAlgorithm::Default) {
+            // SAFETY: same as classify; caller upholds the pointer validity
+            // precondition.
+            return unsafe { self.classify(data, results, categories) };
+        }
+
+        let num = self.validate_classify_args(data, results, categories)?;
+
+        // SAFETY: same as classify; additionally `algorithm` maps to a valid
+        // rte_acl_classify_alg constant by construction.
+        let ret = unsafe {
+            dpdk_sys::rte_acl_classify_alg(
+                self.ctx.as_ptr(),
+                data.as_ptr().cast_mut(),
+                results.as_mut_ptr(),
+                num,
+                categories,
+                algorithm.into(),
+            )
+        };
+
+        if ret != 0 {
+            trace!(
+                "rte_acl_classify_alg({algorithm}) returned {ret} for context '{}'",
+                self.name(),
+            );
+            return Err(match ret {
+                errno::NEG_EINVAL => AclClassifyError::InvalidArgs,
+                errno::NEG_ENOTSUP => AclClassifyError::NotSupported,
+                other => AclClassifyError::Unknown(Errno(other)),
+            });
+        }
+
+        Ok(())
+    }
+
+    /// Set the default classification algorithm for future calls to
+    /// [`classify`][AclContext::classify].
+    ///
+    /// This is a safe wrapper around
+    /// [`rte_acl_set_ctx_classify`][dpdk_sys::rte_acl_set_ctx_classify].
+    ///
+    /// Takes `&mut self` because DPDK takes a `*mut rte_acl_ctx`, indicating the context is
+    /// mutated.  Requiring exclusive access prevents data races with concurrent
+    /// [`classify`][AclContext::classify] calls.
+    ///
+    /// # Interaction with [`Arc`][std::sync::Arc]
+    ///
+    /// The `&mut self` requirement means a context that has been wrapped in
+    /// [`Arc`][std::sync::Arc] (the typical pattern for sharing a
+    /// [`Built<N>`] context across classification threads) is no longer
+    /// reachable for `set_default_algorithm`.  Call this **before** wrapping
+    /// the context in an `Arc`, or use
+    /// [`classify_with_algorithm`][AclContext::classify_with_algorithm] to
+    /// override the algorithm on individual calls without mutating the
+    /// shared context.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`AclSetAlgorithmError`] if the algorithm is unsupported or the parameters are
+    /// invalid.
+    #[cold]
+    #[tracing::instrument(level = "debug", skip(self), fields(name = self.name()))]
+    pub fn set_default_algorithm(
+        &mut self,
+        algorithm: ClassifyAlgorithm,
+    ) -> Result<(), AclSetAlgorithmError> {
+        // SAFETY: `algorithm.into()` yields a valid rte_acl_classify_alg constant by
+        // construction.
+        let ret =
+            unsafe { dpdk_sys::rte_acl_set_ctx_classify(self.ctx.as_ptr(), algorithm.into()) };
+
+        if ret != 0 {
+            error!(
+                "rte_acl_set_ctx_classify({algorithm}) failed for '{}': ret = {ret}",
+                self.name(),
+            );
+            return Err(match ret {
+                errno::NEG_EINVAL => AclSetAlgorithmError::InvalidParams,
+                errno::NEG_ENOTSUP => AclSetAlgorithmError::NotSupported,
+                other => AclSetAlgorithmError::Unknown(Errno(other)),
+            });
+        }
+
+        debug!(
+            "Set default classify algorithm to {algorithm} for ACL context '{}'",
+            self.name(),
+        );
+        Ok(())
+    }
+
+    /// Reset the context, clearing **both** rules and compiled runtime
+    /// structures, and transition back to the [`Configuring`] state.
+    ///
+    /// Safe wrapper around [`rte_acl_reset`][dpdk_sys::rte_acl_reset].  The
+    /// [`AclBuildConfig<N>`] is retained (it lives on the [`Configuring`]
+    /// state just as on [`Built<N>`]), so the next
+    /// [`build`][AclContext::build] requires no fresh config argument.  If
+    /// the caller wants to switch to a different field layout, they should
+    /// drop the context and construct a new one with the desired config.
+    ///
+    /// See [`reset_rules`][AclContext::reset_rules] for the matching method on
+    /// [`Configuring`] contexts and a comparison table.
+    ///
+    /// The returned context has no rules and no compiled structures, but
+    /// the same field layout as before; ready for new rules to be added
+    /// via [`add_rules`][AclContext::add_rules].
+    #[cold]
+    #[tracing::instrument(level = "debug", skip(self), fields(name = self.name()))]
+    pub fn reset(self) -> AclContext<N, Configuring<N>> {
+        // SAFETY: rte_acl_reset mutates only the context pointed at by
+        // `self.ctx`; consuming `self` by value guarantees no other
+        // reference to this context can be in use.
+        unsafe { dpdk_sys::rte_acl_reset(self.ctx.as_ptr()) };
+
+        debug!("Reset ACL context '{}'", self.name());
+
+        // Transition: Built -> Configuring.  Carry the config forward; the
+        // post-reset context still describes the same field layout.
+        let (ctx, params, old_state) = self.into_parts();
+        AclContext {
+            ctx,
+            params,
+            state: Configuring {
+                config: old_state.config,
+            },
+        }
+    }
+
+    /// Validate the arguments common to both classify methods.
+    ///
+    /// Returns the validated `num` value as `u32` on success.
+    ///
+    /// `categories` is checked against DPDK's documented bounds **before** we
+    /// hand it to FFI.  DPDK uses `categories` to index into per-thread runtime
+    /// arrays sized to [`RTE_ACL_MAX_CATEGORIES`][dpdk_sys::RTE_ACL_MAX_CATEGORIES],
+    /// so out-of-bound values can overflow C-side state and are not safe to
+    /// forward.
+    #[inline]
+    fn validate_classify_args(
+        &self,
+        data: &[*const u8],
+        results: &[u32],
+        categories: u32,
+    ) -> Result<u32, AclClassifyError> {
+        // `categories` must be in the closed range [1, MAX_CATEGORIES] and
+        // either 1 or a multiple of RESULTS_MULTIPLIER -- the same constraints
+        // applied by AclBuildConfig::new at build time.  We re-check here
+        // because the categories value at classify time is independent of the
+        // build's num_categories and is otherwise unconstrained input.
+        use super::config::{MAX_CATEGORIES, RESULTS_MULTIPLIER};
+        if categories == 0 {
+            error!("classify categories must be at least 1");
+            return Err(AclClassifyError::InvalidArgs);
+        }
+        if categories > MAX_CATEGORIES {
+            error!(
+                "classify categories {categories} exceeds RTE_ACL_MAX_CATEGORIES ({MAX_CATEGORIES})",
+            );
+            return Err(AclClassifyError::InvalidArgs);
+        }
+        if categories != 1 && !categories.is_multiple_of(RESULTS_MULTIPLIER) {
+            error!(
+                "classify categories {categories} must be 1 or a multiple of \
+                 RTE_ACL_RESULTS_MULTIPLIER ({RESULTS_MULTIPLIER})",
+            );
+            return Err(AclClassifyError::InvalidArgs);
+        }
+        // `categories` must not exceed the value supplied at build time.
+        // The trie's per-node result slots are sized to `num_categories`;
+        // passing `categories > num_categories` would make DPDK's classify
+        // loop read past those slots into adjacent trie memory.  DPDK does
+        // not validate this itself, so we close the hole here.  Passing
+        // `categories < num_categories` is permitted and just truncates
+        // the results (one valid use case is a multi-category build that
+        // a particular caller only wants the first category from).
+        let built_num_categories = self.state.config.num_categories();
+        if categories > built_num_categories {
+            error!(
+                "classify categories {categories} exceeds build-time num_categories ({built_num_categories})",
+            );
+            return Err(AclClassifyError::InvalidArgs);
+        }
+
+        // The number of input buffers must fit in u32.
+        let num: u32 = data.len().try_into().map_err(|_| {
+            error!("Input buffer count {} exceeds u32::MAX", data.len());
+            AclClassifyError::InvalidArgs
+        })?;
+
+        // The results slice must be large enough for `num * categories` entries.
+        let required = (num as usize)
+            .checked_mul(categories as usize)
+            .ok_or_else(|| {
+                error!("Overflow computing required results size: {num} * {categories}",);
+                AclClassifyError::InvalidArgs
+            })?;
+
+        if results.len() < required {
+            error!(
+                "Results slice too small: have {}, need {} ({num} buffers * {categories} categories)",
+                results.len(),
+                required,
+            );
+            return Err(AclClassifyError::InvalidArgs);
+        }
+
+        Ok(num)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// RAII: Drop
+// ---------------------------------------------------------------------------
+
+// Drop takes [`ACL_CREATE_LOCK`] before calling `rte_acl_free` (see the
+// comment on that static).  ACL contexts are expected to be long-lived
+// (created during setup, dropped at shutdown), so this serialisation has
+// no practical cost.  If a future caller drops `AclContext`s on a hot
+// path, the contention with concurrent `AclContext::new` and
+// `dump_all_contexts` calls becomes visible -- prefer to keep contexts
+// alive for their useful lifetime instead.
+//
+// Reentrancy invariant: the lock is a non-reentrant `Mutex<()>`, so an
+// `AclContext` must **not** be dropped on a thread that already holds
+// [`ACL_CREATE_LOCK`] -- doing so would deadlock the current thread on
+// its own previously-acquired guard.  In practice this can only happen
+// in pathological setups (e.g. a caller manually acquires the lock by
+// poking module-private state); the wrapper itself never holds the
+// lock across a region that could free an `AclContext`.
+impl<const N: usize, State> Drop for AclContext<N, State> {
+    fn drop(&mut self) {
+        debug!("Freeing ACL context '{}'", self.name());
+        // Serialize the rte_acl_free call against AclContext::new and
+        // dump_all_contexts via the same process-wide mutex (see
+        // [`ACL_CREATE_LOCK`]).  DPDK's `rte_acl_free` removes the
+        // context's entry from the global TAILQ; without this lock, an
+        // interleaving with a concurrent `find_existing`-then-`create` in
+        // another thread could observe a half-removed entry.
+        //
+        // The facade panics on poison.  Dropping while another holder
+        // panicked mid-operation means the DPDK registry may be in an
+        // unknown state; aborting via the panic is the only safe answer.
+        let _guard = acl_create_lock().lock();
+        // SAFETY: rte_acl_free is safe to call on any valid context pointer; `Drop` runs at
+        // most once per `AclContext`, and the create-lock acquired above serialises against
+        // `rte_acl_create` / `dump_all_contexts`.
+        unsafe { dpdk_sys::rte_acl_free(self.ctx.as_ptr()) };
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Module-level utilities
+// ---------------------------------------------------------------------------
+
+/// Dump information about **all** ACL contexts to stdout.
+///
+/// This is a debugging aid that calls [`rte_acl_list_dump`][dpdk_sys::rte_acl_list_dump].
+/// Output goes directly to stdout and is not captured by the tracing subsystem.
+///
+/// # Thread safety
+///
+/// Holds the same process-wide ACL registry mutex used by
+/// [`AclContext::new`] and [`AclContext`] drops, so the list-walking
+/// inside `rte_acl_list_dump` does not race against concurrent registry
+/// mutation elsewhere in the process.
+#[cold]
+pub fn dump_all_contexts() {
+    // See the locking rationale on Drop / AclContext::new.  The dump walks
+    // DPDK's global TAILQ of contexts; concurrent registry mutation would
+    // expose a list in an inconsistent state to the walk.  Facade panics
+    // on poison (workspace policy -- a prior holder panic implies the
+    // registry may be inconsistent).
+    let _guard = acl_create_lock().lock();
+    // SAFETY: rte_acl_list_dump takes no arguments and simply iterates an internal list.
+    unsafe { dpdk_sys::rte_acl_list_dump() }
+}
diff --git a/dpdk/src/acl/error.rs b/dpdk/src/acl/error.rs
new file mode 100644
index 0000000000..87c5de1cf2
--- /dev/null
+++ b/dpdk/src/acl/error.rs
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Error types for ACL operations.
+//!
+//! Each fallible ACL operation has a dedicated error type following the project's error handling
+//! guidelines.  Errors are strongly typed enums rather than strings or bare numeric codes.
+
+use errno::Errno;
+
+/// Ways in which an ACL context name can be invalid.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, thiserror::Error)]
+pub enum InvalidAclName {
+    /// The name is not valid ASCII.
+    #[error("ACL context name must be valid ASCII")]
+    NotAscii,
+    /// The name is too long (exceeds [`RTE_ACL_NAMESIZE`][dpdk_sys::RTE_ACL_NAMESIZE]).
+    #[error("ACL context name is too long ({len} > {max} bytes)")]
+    TooLong {
+        /// The length of the name that was provided.
+        len: usize,
+        /// The maximum allowed length.
+        max: usize,
+    },
+    /// The name is empty.
+    #[error("ACL context name must not be empty")]
+    Empty,
+    /// The name contains interior null bytes.
+    #[error("ACL context name must not contain null bytes")]
+    ContainsNullBytes,
+}
+
+/// Errors that can occur when creating an ACL context via [`rte_acl_create`][dpdk_sys::rte_acl_create].
+#[derive(Debug, thiserror::Error)]
+pub enum AclCreateError {
+    /// The context name failed validation.
+    #[error("Invalid ACL context name: {0}")]
+    InvalidName(#[from] InvalidAclName),
+    /// A context with this name already exists in DPDK's global registry.
+    ///
+    /// DPDK's [`rte_acl_create`][dpdk_sys::rte_acl_create] silently returns the
+    /// existing context for a duplicate name rather than failing.  Returning
+    /// that pointer wrapped in a new [`AclContext`][super::context::AclContext]
+    /// would create two owning wrappers for the same DPDK handle, leading to
+    /// use-after-free when the first one is dropped.  We refuse the call
+    /// instead.
+    ///
+    /// Detection is reliable against concurrent calls to
+    /// [`AclContext::new`][super::context::AclContext::new] within the same
+    /// process: a module-private mutex serializes the
+    /// `rte_acl_find_existing` + `rte_acl_create` pair.  Concurrent calls to
+    /// `rte_acl_create` from outside this wrapper (e.g. another C/C++
+    /// library linked into the same process) can still race.
+    ///
+    /// As a workspace-level invariant, **nothing else in this process is
+    /// permitted to call `rte_acl_create` / `rte_acl_free` directly**.  If
+    /// a future DPDK PMD or third-party library is added that touches the
+    /// global ACL registry, the wrapper's lock must be either lifted into
+    /// a coordination primitive that the new caller honours, or replaced
+    /// by a different scheme.  Touch
+    /// [`ACL_CREATE_LOCK`][super::context] when revisiting.
+    #[error("An ACL context named '{name}' already exists")]
+    AlreadyExists {
+        /// The name that collided.
+        name: String,
+    },
+    /// DPDK returned `EINVAL` -- one or more parameters are invalid.
+    #[error("Invalid ACL creation parameters")]
+    InvalidParams,
+    /// DPDK returned `ENOMEM` -- insufficient memory to allocate the context.
+    #[error("Not enough memory to create ACL context")]
+    OutOfMemory,
+    /// DPDK set an `rte_errno` value that does not match any documented error for this call.
+    #[error("Unknown error creating ACL context: {0:?}")]
+    Unknown(Errno),
+}
+
+/// Errors that can occur when adding rules via [`rte_acl_add_rules`][dpdk_sys::rte_acl_add_rules].
+#[derive(Debug, thiserror::Error)]
+pub enum AclAddRulesError {
+    /// The caller-supplied slice contains more than `u32::MAX` rules, which
+    /// cannot be represented in the DPDK FFI's `num` argument.  Distinct
+    /// from [`InvalidParams`][AclAddRulesError::InvalidParams] (which is
+    /// DPDK's own validation failure), this is a pre-flight length check
+    /// in the Rust wrapper.
+    #[error("Rule slice length {len} exceeds u32::MAX")]
+    TooManyRules {
+        /// The offending slice length.
+        len: usize,
+    },
+    /// A rule's [`AclField`] values are inconsistent with the
+    /// [`AclBuildConfig`] in effect.  Caught in the Rust wrapper before the
+    /// call would reach `rte_acl_add_rules`; see [`InvalidRule`] for the
+    /// per-violation details.
+    ///
+    /// [`AclField`]: super::rule::AclField
+    /// [`AclBuildConfig`]: super::config::AclBuildConfig
+    #[error("rule {rule_index} is invalid for the configured field layout: {source}")]
+    InvalidRule {
+        /// Position of the offending rule within the caller's slice.
+        rule_index: usize,
+        /// The specific violation.
+        #[source]
+        source: InvalidRule,
+    },
+    /// DPDK returned `ENOMEM` -- not enough space in the context for the new rules.
+    #[error("No space for additional rules in ACL context")]
+    OutOfMemory,
+    /// DPDK returned `EINVAL` -- one or more rule parameters are invalid.
+    #[error("Invalid rule parameters")]
+    InvalidParams,
+    /// DPDK returned an undocumented error code.
+    #[error("Unknown error adding rules: {0:?}")]
+    Unknown(Errno),
+}
+
+/// Per-rule validation failure, reported as the cause of
+/// [`AclAddRulesError::InvalidRule`].
+///
+/// Catching these in Rust (rather than relying on DPDK's later rejection at
+/// build time) avoids reaching C code paths that would shift by an
+/// out-of-range amount or otherwise invoke undefined behaviour on invalid
+/// rule data.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
+pub enum InvalidRule {
+    /// A [`FieldType::Mask`][super::field::FieldType::Mask] field's
+    /// `mask_range` (interpreted as a prefix length) exceeds the field's
+    /// bit width.  DPDK's `RTE_ACL_MASKLEN_TO_BITMASK` would perform a C
+    /// shift by an amount `>= 8 * size`, which is undefined behaviour.
+    #[error(
+        "Mask field at field_index {field_index}: prefix length \
+         {prefix_length} exceeds the field's bit width ({max_bits})"
+    )]
+    PrefixLengthOutOfRange {
+        /// The `field_index` of the offending field.
+        field_index: u8,
+        /// The caller-supplied prefix length.
+        prefix_length: u64,
+        /// `8 * size_bytes`.
+        max_bits: u32,
+    },
+    /// A [`FieldType::Range`][super::field::FieldType::Range] field has
+    /// `value > mask_range`.  DPDK interprets `value` as the inclusive low
+    /// bound and `mask_range` as the inclusive high bound, so the range
+    /// would be empty.
+    #[error(
+        "Range field at field_index {field_index}: low bound {low} \
+         exceeds high bound {high}"
+    )]
+    RangeReversed {
+        /// The `field_index` of the offending field.
+        field_index: u8,
+        /// The low bound (`value`).
+        low: u64,
+        /// The high bound (`mask_range`).
+        high: u64,
+    },
+    /// The rule's `category_mask` has bits set at positions
+    /// `>= config.num_categories()`.  DPDK silently masks out those bits
+    /// at build time, which would make the rule apply to fewer
+    /// categories than the caller intended.  Surfacing this at
+    /// `add_rules` time avoids the silent-narrowing footgun.
+    #[error(
+        "category_mask {category_mask:#010x} has bits set beyond \
+         num_categories ({num_categories}); offending bits: {extra_bits:#010x}"
+    )]
+    CategoryMaskExceedsNumCategories {
+        /// The rule's category mask.
+        category_mask: u32,
+        /// The build config's `num_categories`.
+        num_categories: u32,
+        /// `category_mask & !((1 << num_categories) - 1)`, the bits that
+        /// DPDK would mask off.
+        extra_bits: u32,
+    },
+}
+
+/// Errors that can occur when building the ACL context via [`rte_acl_build`][dpdk_sys::rte_acl_build].
+///
+/// Recovery: any of these variants is reported through
+/// [`AclBuildFailure`][super::context::AclBuildFailure], which carries the
+/// original [`AclContext`][super::context::AclContext] back to the caller in
+/// the [`Configuring`][super::context::Configuring] state.  The Rust typestate
+/// is reset (we did not call `rte_acl_build`'s success path), but the
+/// **DPDK-side rule list is left intact** -- previously-added rules remain
+/// loaded.  Callers who want a clean slate must call
+/// [`reset_rules`][super::context::AclContext::reset_rules] on the returned
+/// context before retrying.
+#[derive(Debug, thiserror::Error)]
+pub enum AclBuildError {
+    /// DPDK returned `ENOMEM` -- not enough memory to build the runtime structures.
+    #[error("Not enough memory to build ACL context")]
+    OutOfMemory,
+    /// DPDK returned `EINVAL` -- the build configuration is invalid.
+    #[error("Invalid ACL build configuration")]
+    InvalidConfig,
+    /// DPDK returned `ERANGE` -- the compiled runtime structures exceeded
+    /// [`AclBuildConfig::max_size`][super::config::AclBuildConfig::max_size].
+    /// Raise the limit or simplify the rule set, then retry on the
+    /// recovered context (see [`AclBuildFailure`][super::context::AclBuildFailure]).
+    #[error("ACL runtime structures exceeded the configured max_size")]
+    ExceededMaxSize,
+    /// DPDK returned an undocumented error code from `rte_acl_build`.
+    #[error("ACL build failed: {0:?}")]
+    Unknown(Errno),
+}
+
+/// Errors that can occur during classification via
+/// [`rte_acl_classify`][dpdk_sys::rte_acl_classify].
+#[derive(Debug, thiserror::Error)]
+pub enum AclClassifyError {
+    /// DPDK returned `EINVAL` -- the classify arguments are invalid.
+    ///
+    /// Common causes:
+    /// - `categories` is zero, greater than [`RTE_ACL_MAX_CATEGORIES`][dpdk_sys::RTE_ACL_MAX_CATEGORIES],
+    ///   or not a multiple of [`RTE_ACL_RESULTS_MULTIPLIER`][dpdk_sys::RTE_ACL_RESULTS_MULTIPLIER].
+    /// - The `results` slice is too small for `num * categories` entries.
+    #[error("Invalid classify arguments")]
+    InvalidArgs,
+    /// DPDK returned `ENOTSUP` -- the requested classification algorithm
+    /// is not supported on this CPU.  Only reachable through
+    /// [`classify_with_algorithm`][super::context::AclContext::classify_with_algorithm];
+    /// the default-algorithm path returns the context's previously-set
+    /// algorithm, which has already been vetted by
+    /// [`set_default_algorithm`][super::context::AclContext::set_default_algorithm].
+    #[error("Requested classification algorithm is not supported on this CPU")]
+    NotSupported,
+    /// DPDK returned an undocumented error code.
+    #[error("Unknown error during classification: {0:?}")]
+    Unknown(Errno),
+}
+
+/// Errors that can occur when setting the classification algorithm via
+/// [`rte_acl_set_ctx_classify`][dpdk_sys::rte_acl_set_ctx_classify].
+#[derive(Debug, thiserror::Error)]
+pub enum AclSetAlgorithmError {
+    /// DPDK returned `EINVAL` -- the parameters are invalid.
+    #[error("Invalid algorithm or context")]
+    InvalidParams,
+    /// The requested algorithm is not supported on this CPU.
+    #[error("Requested classification algorithm is not supported on this platform")]
+    NotSupported,
+    /// DPDK returned an undocumented error code.
+    #[error("Unknown error setting classification algorithm: {0:?}")]
+    Unknown(Errno),
+}
diff --git a/dpdk/src/acl/field.rs b/dpdk/src/acl/field.rs
new file mode 100644
index 0000000000..89d6847417
--- /dev/null
+++ b/dpdk/src/acl/field.rs
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! ACL field definition types.
+//!
+//! These types provide safe, strongly-typed wrappers around DPDK's [`rte_acl_field_def`] and the
+//! associated `RTE_ACL_FIELD_TYPE_*` constants.
+//!
+//! Using Rust enums for [`FieldType`] and [`FieldSize`] makes it impossible to construct an
+//! invalid field definition at the type level -- there is no representation for, say, a 3-byte
+//! field or an undefined comparison type.
+//!
+//! [`rte_acl_field_def`]: dpdk_sys::rte_acl_field_def
+
+use core::fmt::{Display, Formatter};
+
+/// The comparison semantics for an ACL field.
+///
+/// Each field in an ACL rule is compared against input data using one of three
+/// strategies.  The choice of strategy also determines how the `mask_range`
+/// value in [`AclField`][super::rule::AclField] is interpreted (see the
+/// constructor docs on [`AclField`][super::rule::AclField] for the
+/// type-vs-`mask_range` mapping).
+///
+/// Maps to the `RTE_ACL_FIELD_TYPE_*` constants.
+#[repr(u8)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub enum FieldType {
+    /// Prefix-length match.
+    ///
+    /// `mask_range` holds the **prefix length** -- the number of
+    /// most-significant bits to compare.  DPDK derives the bitmask internally
+    /// from the prefix length and the field size.
+    ///
+    /// Examples (for a 4-byte field):
+    /// - `32` -- exact match on all 32 bits.
+    /// - `24` -- IPv4 `/24` (compare the top 24 bits only).
+    /// - `0`  -- wildcard (matches anything).
+    ///
+    /// Corresponds to [`RTE_ACL_FIELD_TYPE_MASK`][dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_MASK].
+    Mask = 0,
+
+    /// Range match.
+    ///
+    /// The comparison is: `low <= input <= high`.  `value` is the low bound
+    /// and `mask_range` is the high bound.  Typically used for port ranges.
+    ///
+    /// Corresponds to [`RTE_ACL_FIELD_TYPE_RANGE`][dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_RANGE].
+    Range = 1,
+
+    /// Bitmask match.
+    ///
+    /// The comparison is: `(input & mask_range) == value`.  `mask_range`
+    /// holds the bitmask applied to the input before comparison with
+    /// `value`.  Typically used for flag-style fields (TCP flags, protocol
+    /// numbers with don't-care bits, etc.).
+    ///
+    /// Example: to match a TCP protocol number (`6`) exactly, use `value = 6`
+    /// and `mask_range = 0xFF`.
+    ///
+    /// Corresponds to [`RTE_ACL_FIELD_TYPE_BITMASK`][dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_BITMASK].
+    Bitmask = 2,
+}
+
+impl Display for FieldType {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        match self {
+            FieldType::Mask => write!(f, "Mask"),
+            FieldType::Range => write!(f, "Range"),
+            FieldType::Bitmask => write!(f, "Bitmask"),
+        }
+    }
+}
+
+/// Valid byte widths for an ACL field.
+///
+/// DPDK restricts ACL field sizes to 1, 2, or 4 bytes per
+/// [`FieldDef`] within a single `input_index` group.  The C library also
+/// supports 8-byte logical fields by spanning two adjacent 4-byte groups,
+/// but the wrapper does not model that split-load behaviour, so
+/// [`AclBuildConfig::new`][super::config::AclBuildConfig::new] rejects
+/// any layout that would have required it.  `FieldSize` therefore omits
+/// `Eight` to keep "constructible width" and "build-valid width" in sync.
+#[repr(u8)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub enum FieldSize {
+    /// 1 byte (e.g. IP protocol number).
+    One = 1,
+    /// 2 bytes (e.g. TCP/UDP port).
+    Two = 2,
+    /// 4 bytes (e.g. IPv4 address).
+    Four = 4,
+}
+
+impl Display for FieldSize {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        write!(f, "{}", *self as u8)
+    }
+}
+
+/// Definition of a single field within an ACL rule.
+///
+/// This is the safe Rust equivalent of [`rte_acl_field_def`][dpdk_sys::rte_acl_field_def].
+/// A collection of field definitions describes the overall layout of rules and input data for an
+/// ACL context.
+///
+/// # Input grouping
+///
+/// For performance reasons the inner loop of the DPDK ACL search function is unrolled to process
+/// four input bytes at a time.  Fields must therefore be grouped into sets of 4 consecutive bytes
+/// via the [`input_index`][FieldDef::input_index] value.  The first input byte is processed as
+/// part of setup, so subsequent groups must be aligned to 4-byte boundaries.
+///
+/// See the [DPDK ACL documentation](https://doc.dpdk.org/guides/prog_guide/packet_classif_access_ctrl.html)
+/// for full details on input grouping rules.
+///
+/// # Why the fields are private
+///
+/// Fields are private so that callers cannot construct a `FieldDef` whose
+/// `field_index` would be out of range for the `N` used in the eventual
+/// [`AclBuildConfig<N>`][super::config::AclBuildConfig].  Construction goes
+/// through [`FieldDef::new`]; the array-level invariants (`field_index < N`,
+/// uniqueness, first-field-is-one-byte) are validated by
+/// [`AclBuildConfig::new`][super::config::AclBuildConfig::new] when the
+/// definitions are assembled.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct FieldDef {
+    /// The comparison type for this field.
+    field_type: FieldType,
+    /// Width of the field in bytes.
+    size: FieldSize,
+    /// Zero-based index of this field within a rule (must be unique per rule layout and < N).
+    field_index: u8,
+    /// Input grouping index.
+    ///
+    /// Fields are processed in groups of 4 consecutive bytes.  All fields that share the same
+    /// `input_index` must fit within 4 bytes starting at the offset of the first field in the
+    /// group.
+    input_index: u8,
+    /// Byte offset of this field within the input data buffer.
+    offset: u32,
+}
+
+impl FieldDef {
+    /// Construct a field definition.
+    ///
+    /// The cross-field invariants (`field_index < N`, uniqueness within the
+    /// array, the first field being one byte wide) are validated by
+    /// [`AclBuildConfig::new`][super::config::AclBuildConfig::new] when the
+    /// definitions are assembled into an array.  The DPDK 4-byte
+    /// `input_index` grouping rule is checked by DPDK itself at
+    /// `rte_acl_build` time.
+    #[must_use]
+    pub const fn new(
+        field_type: FieldType,
+        size: FieldSize,
+        field_index: u8,
+        input_index: u8,
+        offset: u32,
+    ) -> Self {
+        Self {
+            field_type,
+            size,
+            field_index,
+            input_index,
+            offset,
+        }
+    }
+
+    /// The comparison strategy for this field.
+    #[must_use]
+    pub const fn field_type(&self) -> FieldType {
+        self.field_type
+    }
+
+    /// The field width in bytes.
+    #[must_use]
+    pub const fn size(&self) -> FieldSize {
+        self.size
+    }
+
+    /// Zero-based index of this field within the rule layout.
+    #[must_use]
+    pub const fn field_index(&self) -> u8 {
+        self.field_index
+    }
+
+    /// The input grouping index.
+    #[must_use]
+    pub const fn input_index(&self) -> u8 {
+        self.input_index
+    }
+
+    /// Byte offset of this field within the input data buffer.
+    #[must_use]
+    pub const fn offset(&self) -> u32 {
+        self.offset
+    }
+}
+
+impl Display for FieldDef {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        write!(
+            f,
+            "FieldDef {{ type: {}, size: {}, field_index: {}, input_index: {}, offset: {} }}",
+            self.field_type, self.size, self.field_index, self.input_index, self.offset,
+        )
+    }
+}
+
+impl From<FieldDef> for dpdk_sys::rte_acl_field_def {
+    fn from(def: FieldDef) -> Self {
+        (&def).into()
+    }
+}
+
+impl From<&FieldDef> for dpdk_sys::rte_acl_field_def {
+    fn from(def: &FieldDef) -> Self {
+        dpdk_sys::rte_acl_field_def {
+            type_: def.field_type as u8,
+            size: def.size as u8,
+            field_index: def.field_index,
+            input_index: def.input_index,
+            offset: def.offset,
+        }
+    }
+}
+
+// Layout asserts for `rte_acl_field_def`.  The `From<&FieldDef>` impl
+// above produces an `rte_acl_field_def` value by struct-literal
+// composition (not by transmute), so a size/align mismatch with the
+// bindgen struct cannot cause UB on its own.  These asserts are a
+// canary: if DPDK ever changes the layout (added padding, reordered
+// fields, widened a type), the `[FieldDef; N] -> [rte_acl_field_def;
+// N]` conversion that `AclBuildConfig::to_raw` builds when populating
+// `rte_acl_config::defs` would silently produce wrong results.
+// Symmetric with the matching asserts on `RuleData` (rule.rs) and
+// `AclField` (rule.rs).
+const _: () = {
+    assert!(
+        core::mem::size_of::<dpdk_sys::rte_acl_field_def>() == 8,
+        "rte_acl_field_def size changed; recheck FieldDef -> rte_acl_field_def conversion"
+    );
+    assert!(
+        core::mem::align_of::<dpdk_sys::rte_acl_field_def>() == 4,
+        "rte_acl_field_def alignment changed; recheck FieldDef -> rte_acl_field_def conversion"
+    );
+};
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn field_type_discriminants_match_dpdk() {
+        assert_eq!(
+            FieldType::Mask as u8,
+            dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_MASK as u8
+        );
+        assert_eq!(
+            FieldType::Range as u8,
+            dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_RANGE as u8
+        );
+        assert_eq!(
+            FieldType::Bitmask as u8,
+            dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_BITMASK as u8
+        );
+    }
+
+    #[test]
+    fn field_def_converts_to_raw() {
+        let def = FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 2, 12);
+        let raw: dpdk_sys::rte_acl_field_def = def.into();
+        assert_eq!(raw.type_, 0);
+        assert_eq!(raw.size, 4);
+        assert_eq!(raw.field_index, 1);
+        assert_eq!(raw.input_index, 2);
+        assert_eq!(raw.offset, 12);
+    }
+
+    #[test]
+    fn field_def_ref_converts_to_raw() {
+        let def = FieldDef::new(FieldType::Range, FieldSize::Two, 3, 4, 20);
+        let raw: dpdk_sys::rte_acl_field_def = (&def).into();
+        assert_eq!(raw.type_, 1);
+        assert_eq!(raw.size, 2);
+        assert_eq!(raw.field_index, 3);
+        assert_eq!(raw.input_index, 4);
+        assert_eq!(raw.offset, 20);
+    }
+}
diff --git a/dpdk/src/acl/mod.rs b/dpdk/src/acl/mod.rs
new file mode 100644
index 0000000000..d920fe8eff
--- /dev/null
+++ b/dpdk/src/acl/mod.rs
@@ -0,0 +1,826 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! Safe Rust abstraction over DPDK's ACL (Access Control List) library.
+//!
+//! This module provides a safe, idiomatic Rust interface to DPDK's packet classification engine.
+//! The ACL library builds an optimised trie from a set of rules and uses SIMD-accelerated search
+//! to classify input data buffers against those rules at high throughput.
+//!
+//! # Lifecycle
+//!
+//! The ACL context follows a **typestate** lifecycle enforced at compile time:
+//!
+//! ```mermaid
+//! stateDiagram-v2
+//!     [*] --> Configuring: new()
+//!     Configuring --> Configuring: add_rules(&mut)
+//!     Configuring --> Built: build()
+//!     Built --> Configuring: reset()
+//!     Built --> Built: classify(&) -> results
+//! ```
+//!
+//! - [`AclContext<N, Configuring>`][context::AclContext] -- accepts rule mutations via `&mut self`.
+//!   The Rust borrow checker enforces DPDK's documented constraint that rule addition and
+//!   compilation are **not thread-safe**.
+//!
+//! - [`AclContext<N, Built>`][context::AclContext] -- supports packet classification via `&self`.
+//!   Because classification is documented by DPDK as **thread-safe**, the `Sync` implementation
+//!   allows safe concurrent access from multiple threads (e.g. via `Arc`).
+//!
+//! # Type safety
+//!
+//! The const generic parameter `N` (number of fields per rule) is shared across
+//! [`AclContext`], [`Rule`], and [`AclBuildConfig`].  A field-count mismatch between any of
+//! these types is caught at compile time.
+//!
+//! # Byte order
+//!
+//! Rule field values must be in **host byte order** (the native endianness of the build target),
+//! while input data buffers passed to
+//! [`classify`][context::AclContext::classify] must be in **network byte order** (MSB).  DPDK
+//! handles the conversion internally during trie construction.
+//!
+//! The wrapper is developed and tested on little-endian targets (x86_64, aarch64).  Big-endian
+//! targets are not currently exercised; see [`Rule::validate`][rule::Rule::validate] for the
+//! soundness guards that catch the most common endian-related footgun.
+//!
+//! # `mask_range` interpretation
+//!
+//! The meaning of the `mask_range` value inside an [`AclField`] depends on the
+//! [`FieldType`]:
+//!
+//! | [`FieldType`]                          | `mask_range` meaning |
+//! |----------------------------------------|----------------------|
+//! | [`FieldType::Mask`]                    | **prefix length** -- number of most-significant bits to compare (e.g. `32` for exact match, `24` for `/24`) |
+//! | [`FieldType::Range`]                   | **upper bound** of the range (`value` is the lower bound) |
+//! | [`FieldType::Bitmask`]                 | **bitmask** applied to input before comparison |
+//!
+//! # Example
+//!
+//! ```no_run
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! use core::num::NonZero;
+//!
+//! use dataplane_dpdk::acl::*;
+//! use dataplane_dpdk::socket::SocketId;
+//!
+//! // Define a simple 5-tuple IPv4 ACL layout (5 fields).
+//! const NUM_FIELDS: usize = 5;
+//!
+//! let field_defs: [FieldDef; NUM_FIELDS] = [
+//!     FieldDef::new(FieldType::Bitmask, FieldSize::One,  0, 0, 0),
+//!     FieldDef::new(FieldType::Mask,    FieldSize::Four, 1, 1, 2),
+//!     FieldDef::new(FieldType::Mask,    FieldSize::Four, 2, 2, 6),
+//!     FieldDef::new(FieldType::Range,   FieldSize::Two,  3, 3, 10),
+//!     FieldDef::new(FieldType::Range,   FieldSize::Two,  4, 3, 12),
+//! ];
+//!
+//! // 1. Create a context (Configuring state).  The build config is
+//! //    supplied up front so that add_rules can validate each rule's
+//! //    field values against the layout.
+//! let params = AclCreateParams::<NUM_FIELDS>::new(
+//!     "my_acl",
+//!     SocketId::ANY,
+//!     NonZero::new(1024).unwrap(),
+//! )?;
+//! let build_cfg = AclBuildConfig::new(1, field_defs, 0)?;
+//! let mut ctx = AclContext::<NUM_FIELDS>::new(params, build_cfg)?;
+//!
+//! // 2. Add rules -- Rule<5> is enforced by the type system.
+//! let rule = Rule::new(
+//!     RuleData {
+//!         category_mask: CategoryMask::new(1)?,
+//!         priority: Priority::new(1)?,
+//!         userdata: NonZero::new(42).unwrap(),
+//!     },
+//!     [
+//!         AclField::from_u8(6, 0xFF),            // TCP protocol (bitmask)
+//!         AclField::from_u32(0xC0A80100, 24),    // 192.168.1.0/24  (prefix length)
+//!         AclField::from_u32(0x0A000100, 24),    // 10.0.1.0/24     (prefix length)
+//!         AclField::from_u16(0, u16::MAX),       // any src port    (range)
+//!         AclField::from_u16(80, 80),            // dst port 80     (range)
+//!     ],
+//! );
+//! ctx.add_rules(&[rule])?;
+//!
+//! // 3. Build (transitions Configuring -> Built; uses the config from new()).
+//! let ctx = ctx.build().map_err(|f| f.error)?;
+//!
+//! // 4. Classify packets (hot path, &self, thread-safe).
+//! //    `classify` is `unsafe`: each pointer in `packet_ptrs` must reference
+//! //    a buffer valid for at least `ctx.build_config().min_input_size()`
+//! //    bytes -- DPDK loads 4 bytes per `input_index` group, so the safety
+//! //    contract is wider than `max(offset + size)`.
+//! let packet_ptrs: Vec<*const u8> = Vec::new(); // populated by caller
+//! let mut results = vec![0u32; packet_ptrs.len()];
+//! unsafe { ctx.classify(&packet_ptrs, &mut results, 1)?; }
+//!
+//! // results[i] == 0  -> no match
+//! // results[i] == 42 -> matched our rule
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! # Modules
+//!
+//! | Module | Contents |
+//! |--------|----------|
+//! | [`classify`] | [`ClassifyAlgorithm`] -- SIMD backend selection |
+//! | [`config`]   | [`AclCreateParams`], [`AclBuildConfig`] -- validated configuration types |
+//! | [`context`]  | [`AclContext`] -- the typestate context (create, add, build, classify) |
+//! | [`error`]    | Dedicated error types for each fallible operation |
+//! | [`field`]    | [`FieldDef`], [`FieldType`], [`FieldSize`] -- rule field layout |
+//! | [`rule`]     | [`Rule`], [`RuleData`], [`AclField`] -- rule value types |
+
+pub mod classify;
+pub mod config;
+pub mod context;
+pub mod error;
+pub mod field;
+pub mod rule;
+
+// ---------------------------------------------------------------------------
+// Convenience re-exports
+// ---------------------------------------------------------------------------
+
+// Context & typestate markers
+pub use context::{AclBuildFailure, AclContext, Built, Configuring};
+
+// Configuration
+pub use config::{AclBuildConfig, AclCreateParams, InvalidAclBuildConfig};
+pub use config::{MAX_CATEGORIES, MAX_FIELDS, RESULTS_MULTIPLIER};
+
+// Rules & fields
+pub use field::{FieldDef, FieldSize, FieldType};
+pub use rule::{
+    AclField, CategoryMask, InvalidCategoryMask, InvalidPriority, Priority, Rule, RuleData,
+};
+
+// Classification algorithm
+pub use classify::{ClassifyAlgorithm, UnknownClassifyAlgorithm};
+
+// Errors
+pub use error::{
+    AclAddRulesError, AclBuildError, AclClassifyError, AclCreateError, AclSetAlgorithmError,
+    InvalidAclName, InvalidRule,
+};
+
+// Module-level utilities
+pub use context::dump_all_contexts;
+
+/// End-to-end integration tests for the ACL wrapper, exercising real
+/// `rte_acl_*` calls against a live EAL.
+///
+/// # EAL configuration (shared by every test in this module)
+///
+/// All tests initialize EAL via [`start_eal`][self::tests::start_eal], which
+/// passes a fixed set of flags plus two dynamic values:
+///
+/// - `--no-huge --in-memory` -- back EAL with anonymous memory instead of
+///   hugetlbfs.  Keeps the tests runnable on any host without manual hugepage
+///   configuration.
+/// - `--lcores 0@({allowed_cpus})` -- a single logical lcore (the main),
+///   floated across whatever physical CPUs `sched_getaffinity` reports as
+///   available to the process.  No workers means
+///   `rte_eal_mp_remote_launch` has no per-worker readiness flag to read, so
+///   we sidestep a benign-but-flagged data race that ThreadSanitizer reports
+///   against DPDK's lcore startup, and we also avoid spawning unused worker
+///   threads.  Floating (instead of pinning to physical CPU 0) keeps the
+///   tests honest about cgroups, taskset, and container CPU restrictions.
+/// - `--file-prefix <unique-id>` -- a per-init unique identifier so that
+///   concurrent forked test processes do not fight over the EAL runtime
+///   configuration namespace.  Necessary alongside `--in-memory` because EAL
+///   still creates per-process control state in the runtime dir.
+/// - `--no-pci --no-telemetry --no-shconf --no-hpet` -- disable everything we
+///   do not need so the tests start quickly and have no shared-config files
+///   to clean up.
+///
+/// # Running once per process
+///
+/// `eal::init` may only be called once per process.  Every test in this
+/// module funnels through the [`EAL`][self::tests::EAL] `OnceLock`, so
+/// the init happens exactly once regardless of how the harness schedules
+/// tests: nextest's per-test process fork (the workspace default) runs
+/// the lazy init once per fork; a single-process runner (`cargo test
+/// --test-threads=1` or an in-process parallel harness) runs it once for
+/// the lifetime of the process.
+///
+/// # Running locally
+///
+/// ```text
+/// just setup-roots             # rebuild DPDK + wrapper
+/// # re-enter `nix-shell` so DATAPLANE_SYSROOT picks up the new sysroot
+/// cargo nextest run -p dataplane-dpdk acl::tests
+/// ```
+#[cfg(test)]
+mod tests {
+    use core::num::NonZero;
+
+    use concurrency::sync::OnceLock;
+
+    use crate::acl::*;
+    use crate::eal::Eal;
+    use crate::socket::SocketId;
+
+    /// Number of fields used by all lifecycle tests in this module.
+    const NUM_FIELDS: usize = 2;
+
+    /// Process-wide EAL initialized on first use, shared by every test.
+    ///
+    /// `eal::init` may only be called once per process.  Nextest's default
+    /// per-test process forking makes a per-test `init` trivially safe
+    /// (each forked process re-initializes EAL exactly once), but a
+    /// single-process test runner -- `cargo test --test-threads=1`, an
+    /// in-process parallel harness, or any future configuration that drops
+    /// the fork -- would call init twice and fail.  Funneling every test
+    /// through this lazy [`OnceLock`] makes the tests correct under both
+    /// modes: per-process forking initializes once per fork (cheap),
+    /// in-process initializes once for the lifetime of the process.
+    ///
+    /// The `Eal` value is intentionally leaked into the static for the
+    /// lifetime of the process; DPDK has no clean teardown path, and the
+    /// `Eal` Drop would (per [`crate::eal::init`]) be unable to free DPDK
+    /// allocations through the system allocator after the allocator swap.
+    static EAL: OnceLock<Eal> = OnceLock::new();
+
+    /// Lazily initialize EAL on first call.
+    ///
+    /// Each test calls this in place of `eal::init`; subsequent calls
+    /// return the shared `&'static Eal` without re-initializing DPDK.
+    fn start_eal() -> &'static Eal {
+        // DPDK pins lcores, but that is generally not what we actually want in a test environment.
+        // Instead, we need to allocate just lcore 0 (main) and pin it to "everything we legally have access to."
+        fn allowed_cpus() -> String {
+            use nix::sched::{CpuSet, sched_getaffinity};
+            use nix::unistd::Pid;
+            let set = sched_getaffinity(Pid::from_raw(0)).expect("sched_getaffinity");
+            (0..CpuSet::count())
+                .filter(|&i| set.is_set(i).unwrap_or(false))
+                .map(|x| x.to_string())
+                .collect::<Vec<_>>()
+                .join(",")
+        }
+        // concurrent executions of DPDK EAL can fight over allocations and file resources.
+        // You can prevent that with a unique prefix on the hugepage files it allocates (if any).
+        let eal_id = format!("{}", id::Id::<Eal>::new());
+        let core_pinning = format!("0@({})", allowed_cpus());
+        // EAL arguments used the first time EAL is initialized in this process.
+        let args: &[&str] = &[
+            "--no-huge",
+            "--no-pci",
+            "--in-memory",
+            "--no-telemetry",
+            "--no-shconf",
+            "--no-hpet",
+            "--iova-mode=va",
+            "--file-prefix",
+            &eal_id,
+            // Restrict EAL to a single lcore (the main).  Without workers,
+            // rte_eal_mp_remote_launch has no readiness flags to read and there is
+            // no DPDK-internal init race for ThreadSanitizer to flag.  Also avoids
+            // spawning unused worker threads.
+            //
+            // The `0@(<cpu-list>)` form means "logical lcore 0, floated across
+            // the listed physical CPUs": DPDK schedules lcore 0 onto any of
+            // them rather than pinning to a single CPU.  Floating instead of
+            // pinning keeps the tests honest about cgroups, taskset, and
+            // container affinity restrictions.
+            "--lcores",
+            &core_pinning,
+        ];
+
+        EAL.get_or_init(|| super::super::eal::init(args.iter().copied()))
+    }
+
+    /// Standard field layout used by the lifecycle tests.
+    ///
+    /// DPDK ACL requires the first field in the rule definition to be one byte
+    /// long (it is consumed during trie setup).  All subsequent fields must be
+    /// grouped into sets of 4 consecutive bytes via `input_index`.
+    fn standard_field_defs() -> [FieldDef; NUM_FIELDS] {
+        [
+            // Field 0: 1-byte entry at offset 0 (required by DPDK to be 1 byte).
+            FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0),
+            // Field 1: 4-byte Mask field at offset 4, input_index 1.
+            FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 4),
+        ]
+    }
+
+    /// Build a rule that exact-matches the given 32-bit value in field 1.
+    ///
+    /// `userdata` becomes the classify result for matching inputs.
+    fn exact_match_rule(value: u32, userdata: u32) -> Rule<NUM_FIELDS> {
+        Rule::new(
+            RuleData {
+                category_mask: CategoryMask::new(1).unwrap(),
+                priority: Priority::new(1).unwrap(),
+                userdata: NonZero::new(userdata).expect("userdata must be non-zero"),
+            },
+            [
+                // Wildcard entry byte: field 0 is FieldType::Bitmask
+                // (per standard_field_defs).  mask = 0 makes the
+                // predicate `(input & 0) == 0`, which is trivially true
+                // for any input -- so this field matches any byte at
+                // offset 0.
+                AclField::from_u8(0, 0),
+                // Field 1 is FieldType::Mask; mask_range is interpreted
+                // as a prefix length, so 32 means "compare all 32 bits".
+                AclField::from_u32(value, 32),
+            ],
+        )
+    }
+
+    /// Build an 8-byte input buffer carrying `value` at offset 4 in network byte
+    /// order, suitable for the field layout returned by [`standard_field_defs`].
+    fn input_buffer(value: u32) -> [u8; 8] {
+        let mut buf = [0u8; 8];
+        buf[4..8].copy_from_slice(&value.to_be_bytes());
+        buf
+    }
+
+    /// Build the default `AclBuildConfig` used across the lifecycle tests
+    /// (`num_categories = 1`, the standard 2-field layout, no max_size).
+    fn standard_build_config() -> AclBuildConfig<NUM_FIELDS> {
+        AclBuildConfig::new(1, standard_field_defs(), 0).expect("build config")
+    }
+
+    /// End-to-end classify smoke test: build a tiny ACL context, run a real
+    /// `rte_acl_classify` call, and verify the match / no-match outcomes.
+    /// See the [module-level docs](self) for the EAL setup that applies to
+    /// every test here.
+    #[test]
+    fn classify_smoke() {
+        let _eal = start_eal();
+
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "test_acl",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+
+        ctx.add_rules(&[exact_match_rule(0xDEAD_BEEF, 1)])
+            .expect("add rules");
+
+        let ctx = ctx.build().map_err(|f| f.error).expect("build");
+
+        let matching = input_buffer(0xDEAD_BEEF);
+        let non_matching = input_buffer(0);
+
+        let data_ptrs: Vec<*const u8> = vec![matching.as_ptr(), non_matching.as_ptr()];
+        let mut results = vec![0u32; 2];
+        // SAFETY: each buffer is 8 bytes; the field layout's max(offset + size)
+        // is 8 (Mask field at offset 4 of size 4), so each pointer references
+        // at least that many readable bytes.
+        unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify");
+
+        assert_eq!(results[0], 1, "expected match for 0xDEADBEEF");
+        assert_eq!(results[1], 0, "expected no match for 0x00000000");
+    }
+
+    /// Reset round-trip: build, classify, reset back to Configuring, swap
+    /// in a new rule, rebuild (no config supplied -- it lives on the
+    /// context), and verify the new rule's userdata wins.  Also asserts
+    /// that the build config survives the reset.
+    #[test]
+    fn reset_round_trip() {
+        let _eal = start_eal();
+
+        let original_cfg = standard_build_config();
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "reset_round_trip",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, original_cfg.clone()).expect("new context");
+
+        // First build cycle: match 0xAAAAAAAA -> userdata 1.
+        ctx.add_rules(&[exact_match_rule(0xAAAA_AAAA, 1)])
+            .expect("add rules (first)");
+        let ctx = ctx.build().map_err(|f| f.error).expect("build (first)");
+        assert_eq!(
+            ctx.build_config(),
+            &original_cfg,
+            "Built context retains the config supplied to new()",
+        );
+
+        let first_input = input_buffer(0xAAAA_AAAA);
+        let data_ptrs: Vec<*const u8> = vec![first_input.as_ptr()];
+        let mut results = vec![0u32; 1];
+        // SAFETY: see classify_smoke -- same 8-byte buffer / 8-byte layout.
+        unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify (first)");
+        assert_eq!(results[0], 1, "first build should match 0xAAAAAAAA");
+
+        // Reset back to Configuring (config carries through) and load a
+        // different rule.
+        let mut ctx = ctx.reset();
+        assert_eq!(
+            ctx.build_config(),
+            &original_cfg,
+            "reset() preserves the build config across Built -> Configuring",
+        );
+        ctx.add_rules(&[exact_match_rule(0xBBBB_BBBB, 2)])
+            .expect("add rules (second)");
+        let ctx = ctx.build().map_err(|f| f.error).expect("build (second)");
+
+        let second_input = input_buffer(0xBBBB_BBBB);
+        let stale_input = input_buffer(0xAAAA_AAAA);
+        let data_ptrs: Vec<*const u8> = vec![second_input.as_ptr(), stale_input.as_ptr()];
+        let mut results = vec![0u32; 2];
+        // SAFETY: see classify_smoke.
+        unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify (second)");
+        assert_eq!(
+            results[0], 2,
+            "second build should match 0xBBBBBBBB with userdata 2"
+        );
+        assert_eq!(results[1], 0, "second build must not retain the first rule");
+    }
+
+    /// `add_rules` rejects a rule whose [`FieldType::Mask`] field carries a
+    /// prefix length larger than the field's bit width.  Without this
+    /// wrapper-side check, DPDK's `RTE_ACL_MASKLEN_TO_BITMASK` would
+    /// perform a C shift by an out-of-range amount (UB).
+    #[test]
+    fn add_rules_rejects_out_of_range_prefix_length() {
+        let _eal = start_eal();
+
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "prefix_len_validate",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+
+        // Field 1 in standard_field_defs is a 4-byte Mask field, so the
+        // maximum legal prefix length is 32.  33 is out of range.
+        let bad_rule: Rule<NUM_FIELDS> = Rule::new(
+            RuleData {
+                category_mask: CategoryMask::new(1).unwrap(),
+                priority: Priority::new(1).unwrap(),
+                userdata: NonZero::new(1).unwrap(),
+            },
+            [
+                AclField::from_u8(0, 0),
+                AclField::from_u32(0, 33), // prefix_length = 33, max = 32
+            ],
+        );
+        let err = ctx
+            .add_rules(&[bad_rule])
+            .expect_err("out-of-range prefix length must be rejected");
+        assert!(
+            matches!(
+                err,
+                AclAddRulesError::InvalidRule {
+                    rule_index: 0,
+                    source: error::InvalidRule::PrefixLengthOutOfRange {
+                        prefix_length: 33,
+                        max_bits: 32,
+                        ..
+                    },
+                }
+            ),
+            "expected PrefixLengthOutOfRange, got {err:?}",
+        );
+    }
+
+    /// `set_default_algorithm` happy path: build, switch to a specific
+    /// algorithm, and classify.  Uses `Default` which is always supported.
+    #[test]
+    fn set_default_algorithm_then_classify() {
+        let _eal = start_eal();
+
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "set_algo",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+        ctx.add_rules(&[exact_match_rule(0xCAFE_BABE, 7)])
+            .expect("add rules");
+        let mut ctx = ctx.build().map_err(|f| f.error).expect("build");
+
+        // `Default` is always available on any CPU DPDK runs on.
+        ctx.set_default_algorithm(ClassifyAlgorithm::Default)
+            .expect("set_default_algorithm");
+
+        let buf = input_buffer(0xCAFE_BABE);
+        let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()];
+        let mut results = vec![0u32; 1];
+        // SAFETY: see classify_smoke.
+        unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify");
+        assert_eq!(results[0], 7);
+    }
+
+    /// `classify` must reject `categories` values that would overflow DPDK's
+    /// per-thread runtime arrays sized to `RTE_ACL_MAX_CATEGORIES`, even when
+    /// the user's `results` slice is generous enough to satisfy the
+    /// per-element length check.
+    #[test]
+    fn classify_categories_validated_before_ffi() {
+        let _eal = start_eal();
+
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "cat_validation",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+        ctx.add_rules(&[exact_match_rule(0xAAAA_AAAA, 1)])
+            .expect("add rules");
+        let ctx = ctx.build().map_err(|f| f.error).expect("build");
+
+        let buf = input_buffer(0xAAAA_AAAA);
+        let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()];
+
+        // results slice large enough to pass the length check, but categories
+        // out of range -- must still be rejected.
+        let mut results = vec![0u32; 64];
+
+        // categories = 0
+        // SAFETY: see classify_smoke.
+        let r = unsafe { ctx.classify(&data_ptrs, &mut results, 0) };
+        assert!(matches!(r, Err(AclClassifyError::InvalidArgs)));
+
+        // categories > MAX_CATEGORIES (= 16)
+        // SAFETY: see classify_smoke.
+        let r = unsafe { ctx.classify(&data_ptrs, &mut results, MAX_CATEGORIES + 1) };
+        assert!(matches!(r, Err(AclClassifyError::InvalidArgs)));
+
+        // categories > 1 but not a multiple of RESULTS_MULTIPLIER (= 4)
+        // SAFETY: see classify_smoke.
+        let r = unsafe { ctx.classify(&data_ptrs, &mut results, 3) };
+        assert!(matches!(r, Err(AclClassifyError::InvalidArgs)));
+    }
+
+    /// Creating a second [`AclContext`] with a name already registered in
+    /// DPDK's global ACL list must fail with [`AclCreateError::AlreadyExists`]
+    /// rather than silently aliasing the first context (which would
+    /// double-free on drop).
+    #[test]
+    fn duplicate_name_rejected() {
+        let _eal = start_eal();
+
+        let params_a = AclCreateParams::<NUM_FIELDS>::new(
+            "dup_name",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let _ctx_a =
+            AclContext::<NUM_FIELDS>::new(params_a, standard_build_config()).expect("first new");
+
+        let params_b = AclCreateParams::<NUM_FIELDS>::new(
+            "dup_name",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params (dup)");
+        let err = AclContext::<NUM_FIELDS>::new(params_b, standard_build_config())
+            .expect_err("second new with same name must fail");
+        assert!(
+            matches!(err, AclCreateError::AlreadyExists { ref name } if name == "dup_name"),
+            "expected AlreadyExists, got {err:?}",
+        );
+    }
+
+    /// Recovery after `add_rules` overflows `max_rule_num`: the context must
+    /// remain usable.  We submit one rule successfully, then submit more rules
+    /// than the remaining capacity allows, expect the error, and finally build
+    /// and classify against the first rule.
+    #[test]
+    fn add_rules_after_overflow_failure() {
+        let _eal = start_eal();
+
+        // `max_rule_num` of 1: a second add_rules call with any rule will
+        // overflow.
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "overflow_recover",
+            SocketId::ANY,
+            NonZero::new(1).unwrap(),
+        )
+        .expect("create params");
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+
+        ctx.add_rules(&[exact_match_rule(0x1111_1111, 1)])
+            .expect("first add_rules should succeed");
+
+        // Attempting to add another rule must fail: capacity is exhausted.
+        // DPDK signals "no room left in the rule list" with -ENOMEM, which
+        // the wrapper maps to AclAddRulesError::OutOfMemory.  Pin the variant
+        // so a future change in mapping or DPDK's behaviour surfaces as a
+        // test failure rather than silently passing through.
+        let extra = exact_match_rule(0x2222_2222, 2);
+        let err = ctx
+            .add_rules(&[extra])
+            .expect_err("second add_rules should fail when over capacity");
+        assert!(
+            matches!(err, AclAddRulesError::OutOfMemory),
+            "expected OutOfMemory from capacity exhaustion, got {err:?}",
+        );
+
+        // Context must still be usable: build + classify against the first rule.
+        let ctx = ctx
+            .build()
+            .map_err(|f| f.error)
+            .expect("build after recoverable add_rules failure");
+
+        let buf = input_buffer(0x1111_1111);
+        let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()];
+        let mut results = vec![0u32; 1];
+        // SAFETY: see classify_smoke.
+        unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify");
+        assert_eq!(results[0], 1);
+    }
+
+    /// Build failure recovery: when `build()` fails, the wrapper returns
+    /// the original `Configuring` context inside `AclBuildFailure`.  The
+    /// caller must be able to keep using it (add rules, retry).  We force
+    /// the failure by calling `build()` with no rules added (DPDK rejects
+    /// `num_rules == 0` with `-EINVAL`).
+    #[test]
+    fn build_failure_returns_usable_context() {
+        let _eal = start_eal();
+
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "build_failure_recovery",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+
+        // First build with zero rules must fail.
+        let failure = ctx.build().expect_err("build() with no rules must fail");
+        assert!(
+            matches!(failure.error, AclBuildError::InvalidConfig),
+            "expected InvalidConfig, got {:?}",
+            failure.error,
+        );
+
+        // Recover the context, add a rule, build again -- must succeed.
+        let mut ctx = failure.context;
+        ctx.add_rules(&[exact_match_rule(0xDEAD_BEEF, 1)])
+            .expect("add rules after recovery");
+        let ctx = ctx
+            .build()
+            .map_err(|f| f.error)
+            .expect("second build succeeds");
+
+        let buf = input_buffer(0xDEAD_BEEF);
+        let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()];
+        let mut results = vec![0u32; 1];
+        // SAFETY: see classify_smoke.
+        unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify");
+        assert_eq!(results[0], 1);
+    }
+
+    /// `add_rules` rejects a rule whose `category_mask` has bits set at
+    /// positions `>= config.num_categories()`.  DPDK would silently mask
+    /// off those bits at build time, narrowing the rule's intended
+    /// category set; we surface this at `add_rules` time instead.
+    #[test]
+    fn add_rules_rejects_category_mask_beyond_num_categories() {
+        let _eal = start_eal();
+
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "cat_mask_validate",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        // standard_build_config uses num_categories = 1, so only bit 0 is
+        // legal.  Build a rule with bit 1 also set.
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+
+        let bad_rule: Rule<NUM_FIELDS> = Rule::new(
+            RuleData {
+                category_mask: CategoryMask::new(0b11).unwrap(),
+                priority: Priority::new(1).unwrap(),
+                userdata: NonZero::new(1).unwrap(),
+            },
+            [AclField::from_u8(0, 0), AclField::from_u32(0xAAAA_AAAA, 32)],
+        );
+        let err = ctx
+            .add_rules(&[bad_rule])
+            .expect_err("category_mask with bits beyond num_categories must be rejected");
+        assert!(
+            matches!(
+                err,
+                AclAddRulesError::InvalidRule {
+                    rule_index: 0,
+                    source: error::InvalidRule::CategoryMaskExceedsNumCategories {
+                        category_mask: 0b11,
+                        num_categories: 1,
+                        extra_bits: 0b10,
+                    },
+                }
+            ),
+            "expected CategoryMaskExceedsNumCategories, got {err:?}",
+        );
+    }
+
+    /// Concurrent classify under `Arc<AclContext<N, Built<N>>>`: spawns
+    /// several worker threads, each calling
+    /// [`AclContext::classify`][crate::acl::AclContext::classify] in a
+    /// tight loop, and verifies every thread sees the correct match.
+    /// Exercises the per-state `Sync` impl on [`Built<N>`] and ensures
+    /// the wrapper's "share across classification threads" claim isn't
+    /// vacuous.  Test runs with N=4 workers and M=1000 iterations each
+    /// to give the OS scheduler a chance to interleave.
+    #[test]
+    fn classify_concurrent_arc_shared() {
+        use std::sync::Arc;
+        use std::thread;
+
+        let _eal = start_eal();
+
+        const WORKERS: usize = 4;
+        const ITERS_PER_WORKER: usize = 1000;
+
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "classify_concurrent",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+        ctx.add_rules(&[exact_match_rule(0xDEAD_BEEF, 1)])
+            .expect("add rules");
+        let ctx: Arc<AclContext<NUM_FIELDS, Built<NUM_FIELDS>>> =
+            Arc::new(ctx.build().map_err(|f| f.error).expect("build"));
+
+        let handles: Vec<_> = (0..WORKERS)
+            .map(|worker| {
+                let ctx = Arc::clone(&ctx);
+                thread::spawn(move || {
+                    // Each worker owns its own buffers; classify is the
+                    // only place we share state across threads.
+                    let matching = input_buffer(0xDEAD_BEEF);
+                    let non_matching = input_buffer(0);
+                    for _ in 0..ITERS_PER_WORKER {
+                        let data_ptrs: Vec<*const u8> =
+                            vec![matching.as_ptr(), non_matching.as_ptr()];
+                        let mut results = vec![0u32; 2];
+                        // SAFETY: see classify_smoke.
+                        unsafe { ctx.classify(&data_ptrs, &mut results, 1) }
+                            .unwrap_or_else(|e| panic!("worker {worker}: classify failed: {e:?}"));
+                        assert_eq!(
+                            results[0], 1,
+                            "worker {worker}: expected match for 0xDEADBEEF",
+                        );
+                        assert_eq!(results[1], 0, "worker {worker}: expected no match for 0",);
+                    }
+                })
+            })
+            .collect();
+        for h in handles {
+            h.join().expect("worker thread panicked");
+        }
+    }
+
+    /// `classify_with_algorithm` with a non-`Default` algorithm: locks in
+    /// the special-casing in [`AclContext::classify_with_algorithm`] by
+    /// dispatching through the `Scalar` variant (always available on every
+    /// CPU DPDK runs on) and verifying classification still works.
+    #[test]
+    fn classify_with_algorithm_scalar() {
+        let _eal = start_eal();
+
+        let params = AclCreateParams::<NUM_FIELDS>::new(
+            "classify_alg_scalar",
+            SocketId::ANY,
+            NonZero::new(16).unwrap(),
+        )
+        .expect("create params");
+        let mut ctx =
+            AclContext::<NUM_FIELDS>::new(params, standard_build_config()).expect("new context");
+        ctx.add_rules(&[exact_match_rule(0xFEED_FACE, 9)])
+            .expect("add rules");
+        let ctx = ctx.build().map_err(|f| f.error).expect("build");
+
+        let buf = input_buffer(0xFEED_FACE);
+        let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()];
+        let mut results = vec![0u32; 1];
+        // SAFETY: see classify_smoke.
+        unsafe {
+            ctx.classify_with_algorithm(&data_ptrs, &mut results, 1, ClassifyAlgorithm::Scalar)
+        }
+        .expect("classify_with_algorithm(Scalar)");
+        assert_eq!(results[0], 9);
+    }
+}
diff --git a/dpdk/src/acl/rule.rs b/dpdk/src/acl/rule.rs
new file mode 100644
index 0000000000..198914eea3
--- /dev/null
+++ b/dpdk/src/acl/rule.rs
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Open Network Fabric Authors
+
+//! ACL rule types.
+//!
+//! These types provide safe, `#[repr(C)]` wrappers around the DPDK ACL rule structures.
+//! The key types are:
+//!
+//! - [`RuleData`] -- rule metadata (category mask, priority, user data).
+//! - [`AclField`] -- a single field value with its mask or range bound.
+//! - [`Rule`]`<N>` -- a complete rule comprising [`RuleData`] followed by `N` [`AclField`] entries.
+//!
+//! # Layout guarantee
+//!
+//! [`Rule`]`<N>` is `#[repr(C)]` and has an identical memory layout to the struct produced by
+//! DPDK's `RTE_ACL_RULE_DEF(name, N)` C macro.  This means a `*const Rule<N>` can be safely cast
+//! to `*const rte_acl_rule` when calling [`rte_acl_add_rules`][dpdk_sys::rte_acl_add_rules],
+//! provided the context was created with `rule_size = size_of::<Rule<N>>()`.
+//!
+//! # Byte order
+//!
+//! All fields in [`Rule`] structures are expected to be in **host byte order**, as documented by
+//! DPDK.  This is in contrast to the *input data buffers* passed to
+//! [`rte_acl_classify`][dpdk_sys::rte_acl_classify], which must be in **network byte order**
+//! (MSB).
+
+use core::fmt;
+use core::mem;
+use core::num::NonZero;
+
+// ---------------------------------------------------------------------------
+// Priority
+// ---------------------------------------------------------------------------
+
+/// DPDK ACL rule priority bounds.
+///
+/// A result of `0` from classification means "no match", so valid user data values and priorities
+/// must respect these bounds.
+pub mod priority {
+    /// Minimum valid rule priority (inclusive).
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_MIN_PRIORITY`][dpdk_sys::_bindgen_ty_4::RTE_ACL_MIN_PRIORITY].
+    pub const MIN: i32 = dpdk_sys::_bindgen_ty_4::RTE_ACL_MIN_PRIORITY as i32;
+
+    /// Maximum valid rule priority (inclusive).
+    ///
+    /// Corresponds to
+    /// [`RTE_ACL_MAX_PRIORITY`][dpdk_sys::_bindgen_ty_4::RTE_ACL_MAX_PRIORITY].
+    pub const MAX: i32 = dpdk_sys::_bindgen_ty_4::RTE_ACL_MAX_PRIORITY as i32;
+}
+
+/// A validated ACL rule priority.
+///
+/// The inner [`NonZero<i32>`] is guaranteed to fall in the closed range
+/// \[[`priority::MIN`], [`priority::MAX`]\] (DPDK's `RTE_ACL_MIN_PRIORITY` is
+/// `1`, so zero is unreachable).  `#[repr(transparent)]` means this is
+/// layout-compatible with the underlying `i32` field of
+/// [`rte_acl_rule_data`][dpdk_sys::rte_acl_rule_data], and `Option<Priority>`
+/// is niche-optimised down to 4 bytes -- matching the
+/// [`userdata: NonZero<u32>`](RuleData) treatment.
+///
+/// Construct via [`new`][Priority::new] (which is `const fn`, so it works in
+/// `const` contexts at the cost of an `?` or `.unwrap()`).  The
+/// [`MIN`][Priority::MIN] and [`MAX`][Priority::MAX] constants are pre-validated
+/// shorthand for the range endpoints.
+#[repr(transparent)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Priority(NonZero<i32>);
+
+/// Error returned when [`Priority::new`] is given an out-of-range value.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
+#[error(
+    "ACL priority {value} out of range [{}, {}]",
+    priority::MIN,
+    priority::MAX
+)]
+pub struct InvalidPriority {
+    /// The out-of-range value the caller supplied.
+    pub value: i32,
+}
+
+impl Priority {
+    // Both constants below evaluate at compile time.  `NonZero::new` +
+    // `.unwrap()` in a const context surfaces as a const-eval error
+    // (not a runtime panic) if the value happens to be zero -- which
+    // would itself be a compile-time bug.  Clippy's
+    // `useless_nonzero_new_unchecked` lint prefers this form over
+    // `NonZero::new_unchecked` for const items.
+
+    /// Smallest valid priority value (equal to [`priority::MIN`] = DPDK's
+    /// `RTE_ACL_MIN_PRIORITY`, currently `1`).
+    pub const MIN: Self = match NonZero::new(priority::MIN) {
+        Some(nz) => Self(nz),
+        // unreachable in const context: priority::MIN is a positive i32
+        // (verified at compile time); reaching this arm would be a
+        // compile error, not a runtime panic.
+        None => panic!("priority::MIN must be non-zero"),
+    };
+
+    /// Largest valid priority value (equal to [`priority::MAX`] = DPDK's
+    /// `RTE_ACL_MAX_PRIORITY`).
+    pub const MAX: Self = match NonZero::new(priority::MAX) {
+        Some(nz) => Self(nz),
+        None => panic!("priority::MAX must be non-zero"),
+    };
+
+    /// Construct a `Priority` from a raw value.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`InvalidPriority`] when `value` is outside
+    /// \[[`priority::MIN`], [`priority::MAX`]\].
+    pub const fn new(value: i32) -> Result<Self, InvalidPriority> {
+        if value < priority::MIN || value > priority::MAX {
+            return Err(InvalidPriority { value });
+        }
+        // priority::MIN == 1 (DPDK's RTE_ACL_MIN_PRIORITY), so the
+        // range check above guarantees value >= 1 and therefore != 0;
+        // the `unreachable!()` arm is dead.  Preferred over
+        // `unsafe { NonZero::new_unchecked }` so a wrong invariant
+        // faults loudly instead of being undefined behaviour.
+        match NonZero::new(value) {
+            Some(nz) => Ok(Self(nz)),
+            None => unreachable!(),
+        }
+    }
+
+    /// Get the raw `i32`.
+    #[must_use]
+    pub const fn get(self) -> i32 {
+        self.0.get()
+    }
+}
+
+impl fmt::Display for Priority {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+impl TryFrom<i32> for Priority {
+    type Error = InvalidPriority;
+    fn try_from(value: i32) -> Result<Self, Self::Error> {
+        Self::new(value)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// CategoryMask
+// ---------------------------------------------------------------------------
+
+/// A validated category bitmask for an ACL rule.
+///
+/// Each bit corresponds to one category (bit `i` enables category `i`).  DPDK
+/// supports up to [`RTE_ACL_MAX_CATEGORIES`][dpdk_sys::RTE_ACL_MAX_CATEGORIES]
+/// categories, so only the low `RTE_ACL_MAX_CATEGORIES` bits may be set.
+///
+/// `#[repr(transparent)]` and inner [`NonZero<u32>`] make `Option<CategoryMask>`
+/// niche-optimised to 4 bytes and rule out the zero-mask case (a rule with no
+/// categories enabled can never match).  The bit-range check enforces the
+/// type-level invariant that no out-of-range categories are referenced.
+///
+/// A successful build with `num_categories = k` does not imply `k = 32`; the
+/// per-build category count is checked by DPDK at `rte_acl_build` time.  This
+/// newtype enforces the upper bound common to all builds.
+#[repr(transparent)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct CategoryMask(NonZero<u32>);
+
+/// Error returned when [`CategoryMask::new`] is given an invalid bitmask.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
+pub enum InvalidCategoryMask {
+    /// The mask is zero -- the rule would match no category.
+    #[error("category mask is zero")]
+    Zero,
+    /// The mask has bits set above `RTE_ACL_MAX_CATEGORIES`.
+    #[error(
+        "category mask {value:#010x} has bits set above bit {} \
+         (RTE_ACL_MAX_CATEGORIES = {})",
+        dpdk_sys::RTE_ACL_MAX_CATEGORIES - 1,
+        dpdk_sys::RTE_ACL_MAX_CATEGORIES
+    )]
+    OutOfRange {
+        /// The out-of-range value the caller supplied.
+        value: u32,
+    },
+}
+
+impl CategoryMask {
+    /// Bit mask covering all categories DPDK supports: bits 0 through
+    /// `RTE_ACL_MAX_CATEGORIES - 1` inclusive.
+    pub const ALLOWED_BITS: u32 = {
+        // Avoid (1 << 32) overflow when MAX_CATEGORIES is 32; (1u32 << 32) is UB
+        // in C and a debug-panic in Rust, so guard.
+        let max = dpdk_sys::RTE_ACL_MAX_CATEGORIES;
+        if max >= 32 {
+            u32::MAX
+        } else {
+            (1u32 << max) - 1
+        }
+    };
+
+    /// Construct a `CategoryMask` from a raw `u32`.
+    ///
+    /// # Errors
+    ///
+    /// - [`InvalidCategoryMask::Zero`] if `value == 0`.
+    /// - [`InvalidCategoryMask::OutOfRange`] if any bit above
+    ///   `RTE_ACL_MAX_CATEGORIES - 1` is set.
+    pub const fn new(value: u32) -> Result<Self, InvalidCategoryMask> {
+        if value == 0 {
+            return Err(InvalidCategoryMask::Zero);
+        }
+        if value & !Self::ALLOWED_BITS != 0 {
+            return Err(InvalidCategoryMask::OutOfRange { value });
+        }
+        // The `value == 0` check above guarantees value != 0, so the
+        // `unreachable!()` arm is dead.  Preferred over
+        // `unsafe { NonZero::new_unchecked }` so a wrong invariant
+        // faults loudly instead of being undefined behaviour.
+        match NonZero::new(value) {
+            Some(nz) => Ok(Self(nz)),
+            None => unreachable!(),
+        }
+    }
+
+    /// The raw `u32` value.
+    #[must_use]
+    pub const fn get(self) -> u32 {
+        self.0.get()
+    }
+}
+
+impl fmt::Display for CategoryMask {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{:#010x}", self.0.get())
+    }
+}
+
+impl TryFrom<u32> for CategoryMask {
+    type Error = InvalidCategoryMask;
+    fn try_from(value: u32) -> Result<Self, Self::Error> {
+        Self::new(value)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// RuleData
+// ---------------------------------------------------------------------------
+
+/// Metadata associated with an ACL rule.
+///
+/// This is the safe Rust equivalent of [`rte_acl_rule_data`][dpdk_sys::rte_acl_rule_data] and has
+/// an identical `#[repr(C)]` memory layout.
+///
+/// # Important: `userdata` must be non-zero
+///
+/// DPDK uses `userdata == 0` as a sentinel meaning "no match".  If you set `userdata` to `0`,
+/// the rule will effectively never be reported as matching.
+#[repr(C)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct RuleData {
+    /// Bitmask of categories this rule applies to.
+    ///
+    /// Each bit corresponds to one category (bit `i` enables category `i`).
+    /// Validated at construction; see [`CategoryMask::new`].
+    pub category_mask: CategoryMask,
+
+    /// Rule priority.  Higher numeric value means higher priority.
+    ///
+    /// When multiple rules match a given input for the same category, the rule with the highest
+    /// priority wins.  Validated to be in the range
+    /// \[[`priority::MIN`], [`priority::MAX`]\] at construction; see [`Priority::new`].
+    pub priority: Priority,
+
+    /// Opaque value returned to the caller on match.
+    ///
+    /// **Must be non-zero.**  A classification result of `0` indicates that no rule matched.
+    pub userdata: NonZero<u32>,
+}
+
+// Compile-time layout assertions against the raw DPDK type.
+const _: () = {
+    assert!(
+        mem::size_of::<RuleData>() == mem::size_of::<dpdk_sys::rte_acl_rule_data>(),
+        "RuleData size must match rte_acl_rule_data"
+    );
+    assert!(
+        mem::align_of::<RuleData>() == mem::align_of::<dpdk_sys::rte_acl_rule_data>(),
+        "RuleData alignment must match rte_acl_rule_data"
+    );
+};
+
+impl fmt::Display for RuleData {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "RuleData {{ category_mask: {}, priority: {}, userdata: {} }}",
+            self.category_mask, self.priority, self.userdata,
+        )
+    }
+}
+
+// ---------------------------------------------------------------------------
+// AclField
+// ---------------------------------------------------------------------------
+
+/// A single field value within an ACL rule.
+///
+/// This is the safe Rust equivalent of [`rte_acl_field`][dpdk_sys::rte_acl_field] and has an
+/// identical `#[repr(C)]` memory layout.
+///
+/// The interpretation of the value and mask/range depends on the
+/// [`FieldType`][super::field::FieldType] specified in the corresponding
+/// [`FieldDef`][super::field::FieldDef]:
+///
+/// | [`FieldType`][super::field::FieldType] | value      | mask/range         |
+/// |----------------------------------------|------------|--------------------|
+/// | [`Mask`][super::field::FieldType::Mask]       | match value  | prefix length      |
+/// | [`Range`][super::field::FieldType::Range]     | range low    | range high         |
+/// | [`Bitmask`][super::field::FieldType::Bitmask] | match value  | bitmask            |
+///
+/// Use the [`from_u8`][AclField::from_u8], [`from_u16`][AclField::from_u16],
+/// [`from_u32`][AclField::from_u32], or [`from_u64_raw`][AclField::from_u64_raw] constructors to set
+/// the value and mask/range for the appropriate field width.
+///
+/// # Why the union fields are private
+///
+/// The `rte_acl_field_types` union is exposed via private fields so that safe
+/// code cannot construct an `AclField` with a narrow union member set and
+/// uninitialized upper bytes (e.g. `rte_acl_field_types { u8_: 5 }` leaves
+/// bytes 1..8 undefined).  Safe accessors read `u64_` and would observe those
+/// uninit bytes, which is undefined behavior.  Forcing construction through
+/// [`from_u8`][AclField::from_u8] / [`from_u16`][AclField::from_u16] /
+/// [`from_u32`][AclField::from_u32] / [`from_u64_raw`][AclField::from_u64_raw] (each
+/// of which zeroes the full 8 bytes before writing the narrow member)
+/// upholds the "all 8 bytes initialized" invariant that the union accessors
+/// rely on.
+///
+/// `AclField` is layout-compatible with [`rte_acl_field`][dpdk_sys::rte_acl_field] (verified by
+/// the const asserts below).  We keep the Rust newtype rather than re-exporting the bindgen
+/// struct so that we can attach typed constructors, safe accessors, and proper `Debug` /
+/// `Display` impls without leaking the `_bindgen_ty_*` union name into consumer code.
+// INVARIANT (union access on AclField).
+//
+// Every `AclField` reachable through this crate's safe API must have its
+// `value` and `mask_range` unions **fully initialized in all 8 bytes**.  All
+// constructors uphold this:
+//
+// * `Default::default()` -- explicit `u64_: 0` initializer per union
+//   (zeroes all 8 bytes; no `unsafe` needed).
+// * `from_u8` / `from_u16` / `from_u32` -- call `Self::default()` first
+//   (zeroing both unions) then overwrite a narrow member.
+// * `from_u64_raw` -- writes both unions with explicit `u64_` initializers.
+// * `zero()` -- delegates to `Default::default()`.
+//
+// Given this invariant, reading any union member (including the widest,
+// `u64_`) is sound: every member of `rte_acl_field_types` is an integer
+// type, so any bit pattern is a valid value.  Each `unsafe` block that
+// reads a union member cites this anchor as its SAFETY argument so that
+// removing one impl (e.g. `Debug`) does not orphan the invariant for the
+// others.
+//
+// The `mem::size_of::<rte_acl_field_types>() == 8` const-assert below is
+// the load-bearing check that "writing 8 bytes" covers the whole union;
+// a future bindgen change adding a non-integer member trips it.
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct AclField {
+    /// The match value (or range lower bound).  Private -- see the type-level
+    /// doc for why, and the INVARIANT comment above for the union-access
+    /// soundness argument.
+    value: dpdk_sys::rte_acl_field_types,
+    /// The mask, bitmask, or range upper bound (interpretation depends on the
+    /// field type).  Private -- see the type-level doc and the INVARIANT
+    /// comment above.
+    mask_range: dpdk_sys::rte_acl_field_types,
+}
+
+// Compile-time layout assertions against the raw DPDK type.
+//
+// The union-accessor soundness argument (every constructor writes 8 bytes;
+// every union member is an integer type) depends on the union being exactly
+// 8 bytes wide.  We assert that directly so a future bindgen change that
+// adds, e.g., a `__m128` member trips here rather than silently making the
+// safe accessors unsound.
+const _: () = {
+    assert!(
+        mem::size_of::<dpdk_sys::rte_acl_field_types>() == 8,
+        "rte_acl_field_types union must be exactly 8 bytes for the \
+         'all 8 bytes initialized' invariant on AclField accessors"
+    );
+    assert!(
+        mem::size_of::<AclField>() == mem::size_of::<dpdk_sys::rte_acl_field>(),
+        "AclField size must match rte_acl_field"
+    );
+    assert!(
+        mem::align_of::<AclField>() == mem::align_of::<dpdk_sys::rte_acl_field>(),
+        "AclField alignment must match rte_acl_field"
+    );
+};
+
+impl Default for AclField {
+    /// Returns a zero-initialized field.
+    ///
+    /// For [`Mask`][super::field::FieldType::Mask]-type fields, this is a wildcard that matches
+    /// any input (value `0` with mask `0`).
+    fn default() -> Self {
+        // Explicit per-union initialization through the `u64_` member
+        // zeroes all 8 bytes of each union without going through
+        // `mem::zeroed`.  This is safe (no `unsafe` needed) and upholds
+        // the same "all 8 bytes initialised" invariant the union
+        // accessors rely on.
+        Self {
+            value: dpdk_sys::rte_acl_field_types { u64_: 0 },
+            mask_range: dpdk_sys::rte_acl_field_types { u64_: 0 },
+        }
+    }
+}
+
+impl fmt::Debug for AclField {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // SAFETY: see the INVARIANT (union access on AclField) block above
+        // the struct definition.  Every constructor leaves all 8 bytes of
+        // each union initialized; reading `u64_` is defined behavior.
+        let (value, mask) = unsafe { (self.value.u64_, self.mask_range.u64_) };
+        f.debug_struct("AclField")
+            .field("value", &format_args!("{value:#018x}"))
+            .field("mask_range", &format_args!("{mask:#018x}"))
+            .finish()
+    }
+}
+
+impl fmt::Display for AclField {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // SAFETY: see the INVARIANT (union access on AclField) block above
+        // the struct definition.
+        //
+        // Format choice: labeled `value=... mask_range=...` instead of
+        // `value/mask_range`.  The latter reads like a CIDR prefix
+        // (`addr/len`), but `mask_range` for Mask-typed fields actually IS
+        // a prefix length while for Bitmask/Range it's a bitmask or upper
+        // bound -- the slash form would mislead in two of three cases.
+        let (value, mask) = unsafe { (self.value.u64_, self.mask_range.u64_) };
+        write!(f, "value={value:#018x} mask_range={mask:#018x}")
+    }
+}
+
+impl PartialEq for AclField {
+    fn eq(&self, other: &Self) -> bool {
+        // SAFETY: see the INVARIANT (union access on AclField) block above the struct definition.
+        unsafe {
+            self.value.u64_ == other.value.u64_ && self.mask_range.u64_ == other.mask_range.u64_
+        }
+    }
+}
+
+// `Eq` cannot be derived because the underlying bindgen union does not implement `Eq`.
+// Manual impl is sound because `PartialEq` is reflexive for the integer-typed union members.
+impl Eq for AclField {}
+
+impl core::hash::Hash for AclField {
+    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
+        // SAFETY: see the INVARIANT (union access on AclField) block above the struct definition.
+        let (value, mask) = unsafe { (self.value.u64_, self.mask_range.u64_) };
+        value.hash(state);
+        mask.hash(state);
+    }
+}
+
+impl AclField {
+    /// Create a field from `u8` value and mask/range.
+    ///
+    /// Use this for fields declared with [`FieldSize::One`][super::field::FieldSize::One].
+    ///
+    /// The upper bytes of the underlying union are zeroed.
+    #[must_use]
+    pub fn from_u8(value: u8, mask_range: u8) -> Self {
+        // Zero-initialize first so that the upper bytes are deterministic.
+        let mut field = Self::default();
+        field.value.u8_ = value;
+        field.mask_range.u8_ = mask_range;
+        field
+    }
+
+    /// Create a field from `u16` value and mask/range.
+    ///
+    /// Use this for fields declared with [`FieldSize::Two`][super::field::FieldSize::Two].
+    ///
+    /// The upper bytes of the underlying union are zeroed.
+    #[must_use]
+    pub fn from_u16(value: u16, mask_range: u16) -> Self {
+        let mut field = Self::default();
+        field.value.u16_ = value;
+        field.mask_range.u16_ = mask_range;
+        field
+    }
+
+    /// Create a field from `u32` value and mask/range.
+    ///
+    /// Use this for fields declared with [`FieldSize::Four`][super::field::FieldSize::Four].
+    ///
+    /// The upper bytes of the underlying union are zeroed.
+    #[must_use]
+    pub fn from_u32(value: u32, mask_range: u32) -> Self {
+        let mut field = Self::default();
+        field.value.u32_ = value;
+        field.mask_range.u32_ = mask_range;
+        field
+    }
+
+    /// Create a field from a raw `u64` value and mask/range, writing all
+    /// 8 bytes of each union member directly.
+    ///
+    /// The wrapper's [`FieldSize`][super::field::FieldSize] caps at 4
+    /// bytes, so bits above the declared `size_bytes * 8` are ignored by
+    /// DPDK at classify time and will be rejected by
+    /// [`Rule::validate`] /
+    /// [`add_rules`][super::context::AclContext::add_rules] when
+    /// invariant-checking against the
+    /// [`AclBuildConfig<N>`][super::config::AclBuildConfig].  Prefer
+    /// [`from_u8`][AclField::from_u8] / [`from_u16`][AclField::from_u16] /
+    /// [`from_u32`][AclField::from_u32] for normal use; this constructor
+    /// exists for explicit bit-pattern composition (e.g. test fixtures
+    /// or low-level data interop).
+    #[must_use]
+    pub fn from_u64_raw(value: u64, mask_range: u64) -> Self {
+        Self {
+            value: dpdk_sys::rte_acl_field_types { u64_: value },
+            mask_range: dpdk_sys::rte_acl_field_types { u64_: mask_range },
+        }
+    }
+
+    /// Create a fully-zeroed field -- value `0` with mask/range `0`.
+    ///
+    /// Equivalent to [`AclField::default()`].
+    ///
+    /// # Important: this is **not** a universal wildcard
+    ///
+    /// Whether a zero field matches anything depends on the field's
+    /// [`FieldType`][super::field::FieldType] in the build config:
+    ///
+    /// - [`Mask`][super::field::FieldType::Mask] -- matches **anything**
+    ///   (`mask_range == 0` means "prefix length 0", i.e. compare zero bits).
+    /// - [`Range`][super::field::FieldType::Range] -- matches **only the
+    ///   value 0** (low and high bounds both 0).  For a range wildcard use
+    ///   [`from_u32`][AclField::from_u32]`(0, u32::MAX)` or the appropriate
+    ///   width.
+    /// - [`Bitmask`][super::field::FieldType::Bitmask] -- matches anything
+    ///   (predicate is `(input & 0) == 0`, which is trivially true), but
+    ///   you almost always want a non-zero mask in practice; reach for an
+    ///   explicit constructor instead.
+    #[must_use]
+    pub fn zero() -> Self {
+        Self::default()
+    }
+
+    /// Read the value as `u8`.
+    ///
+    /// Reading any integer-typed union member is sound for any [`AclField`]
+    /// constructed through this crate's public API.  The caller should still
+    /// ensure the field was constructed via [`from_u8`][AclField::from_u8] or
+    /// that the `u8` interpretation is meaningful in context; otherwise the
+    /// returned value is the low byte of whatever wider member was stored.
+    #[must_use]
+    pub fn value_u8(&self) -> u8 {
+        // SAFETY: see the INVARIANT (union access on AclField) block
+        // above the struct definition.  Every constructor leaves all 8
+        // bytes of each union initialized via explicit `u64_: 0` followed
+        // by narrow-member writes, so reading any union member is defined
+        // behavior.
+        unsafe { self.value.u8_ }
+    }
+
+    /// Read the mask/range as `u8`.
+    ///
+    /// See [`value_u8`][AclField::value_u8] for the interpretation note.
+    #[must_use]
+    pub fn mask_range_u8(&self) -> u8 {
+        // SAFETY: see value_u8.
+        unsafe { self.mask_range.u8_ }
+    }
+
+    /// Read the value as `u16`.
+    ///
+    /// See [`value_u8`][AclField::value_u8] for the interpretation note.
+    #[must_use]
+    pub fn value_u16(&self) -> u16 {
+        // SAFETY: see value_u8.
+        unsafe { self.value.u16_ }
+    }
+
+    /// Read the mask/range as `u16`.
+    ///
+    /// See [`value_u8`][AclField::value_u8] for the interpretation note.
+    #[must_use]
+    pub fn mask_range_u16(&self) -> u16 {
+        // SAFETY: see value_u8.
+        unsafe { self.mask_range.u16_ }
+    }
+
+    /// Read the value as `u32`.
+    ///
+    /// See [`value_u8`][AclField::value_u8] for the interpretation note.
+    #[must_use]
+    pub fn value_u32(&self) -> u32 {
+        // SAFETY: see value_u8.
+        unsafe { self.value.u32_ }
+    }
+
+    /// Read the mask/range as `u32`.
+    ///
+    /// See [`value_u8`][AclField::value_u8] for the interpretation note.
+    #[must_use]
+    pub fn mask_range_u32(&self) -> u32 {
+        // SAFETY: see value_u8.
+        unsafe { self.mask_range.u32_ }
+    }
+
+    /// Read the value as `u64`.
+    #[must_use]
+    pub fn value_u64(&self) -> u64 {
+        // SAFETY: see the INVARIANT (union access on AclField) block above the struct definition.
+        unsafe { self.value.u64_ }
+    }
+
+    /// Read the mask/range as `u64`.
+    #[must_use]
+    pub fn mask_range_u64(&self) -> u64 {
+        // SAFETY: see the INVARIANT (union access on AclField) block above the struct definition.
+        unsafe { self.mask_range.u64_ }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Rule<N>
+// ---------------------------------------------------------------------------
+
+/// A complete ACL rule with `N` fields.
+///
+/// This type is `#[repr(C)]` and has the same memory layout as the struct produced by the DPDK
+/// `RTE_ACL_RULE_DEF(name, N)` macro:
+///
+/// ```c
+/// struct name {
+///     struct rte_acl_rule_data data;
+///     struct rte_acl_field     field[N];
+/// };
+/// ```
+///
+/// Because of this layout guarantee, a `*const Rule<N>` can be cast to `*const rte_acl_rule` and
+/// passed directly to [`rte_acl_add_rules`][dpdk_sys::rte_acl_add_rules], as long as the ACL
+/// context was created with `rule_size = core::mem::size_of::<Rule<N>>()`.
+///
+/// # Const parameter `N`
+///
+/// `N` is the number of fields in this rule and must match the number of
+/// [`FieldDef`][super::field::FieldDef] entries in the
+/// [`AclBuildConfig`][super::config::AclBuildConfig] used to build the
+/// [`AclContext`][super::context::AclContext].  Using the same const generic for both the context
+/// and its rules catches field-count mismatches at compile time.
+#[repr(C)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Rule<const N: usize> {
+    /// Rule metadata: category mask, priority, and user data.
+    ///
+    /// Private so that constructing a `Rule<N>` must go through
+    /// [`Rule::new`], which enforces the `N > 0` compile-time check.  Read via
+    /// [`Rule::data`] / [`Rule::data_mut`].
+    data: RuleData,
+    /// Field values (one per field definition in the ACL context).
+    ///
+    /// Private for the same reason as `data` -- see the doc above.  Read via
+    /// [`Rule::fields`] / [`Rule::fields_mut`].
+    fields: [AclField; N],
+}
+
+impl<const N: usize> Rule<N> {
+    /// Compile-time guard: a zero-field rule has nothing to match against
+    /// and DPDK would reject it at build time anyway.  Catch it earlier.
+    const _CHECK_N_NONZERO: () = assert!(N > 0, "Rule<N> requires N > 0");
+
+    /// Compile-time guard: `Rule<N>` must have exactly the layout produced by
+    /// the C macro `RTE_ACL_RULE_DEF(_, N)`: 12 bytes of `rte_acl_rule_data`
+    /// plus 4 bytes of padding (to reach 8-byte alignment of `rte_acl_field`)
+    /// plus `N * 16` bytes of fields.  This is evaluated for every concrete
+    /// `N` reached at runtime (forced via the let-binding in `new`).
+    const _CHECK_LAYOUT: () = {
+        let expected = mem::size_of::<dpdk_sys::rte_acl_rule>()
+            + N * mem::size_of::<dpdk_sys::rte_acl_field>();
+        assert!(
+            mem::size_of::<Self>() == expected,
+            "Rule<N> layout must match RTE_ACL_RULE_DEF(_, N)"
+        );
+        assert!(
+            mem::align_of::<Self>() == mem::align_of::<dpdk_sys::rte_acl_rule>(),
+            "Rule<N> alignment must match rte_acl_rule"
+        );
+    };
+
+    /// The size of this rule type in bytes, suitable for passing as `rule_size` when creating an
+    /// ACL context.
+    ///
+    /// This is equivalent to `core::mem::size_of::<Rule<N>>()` but provided as a named constant
+    /// for clarity at call sites.
+    pub const RULE_SIZE: u32 = mem::size_of::<Self>() as u32;
+
+    /// Create a new rule.
+    ///
+    /// # Arguments
+    ///
+    /// * `data` -- the rule metadata (category mask, priority, and user data).
+    /// * `fields` -- the field values for this rule; one entry per field definition.
+    #[must_use]
+    pub const fn new(data: RuleData, fields: [AclField; N]) -> Self {
+        // Force evaluation of the const checks at every instantiation of `new`.
+        let () = Self::_CHECK_N_NONZERO;
+        let () = Self::_CHECK_LAYOUT;
+        Self { data, fields }
+    }
+
+    /// Borrow the rule metadata.
+    #[must_use]
+    pub const fn data(&self) -> &RuleData {
+        &self.data
+    }
+
+    /// Mutable access to the rule metadata.
+    ///
+    /// Note: mutations made through this reference are not re-validated
+    /// until the [`Rule`] is handed to
+    /// [`AclContext::add_rules`][super::context::AclContext::add_rules],
+    /// which calls [`validate`][Rule::validate] before forwarding to
+    /// DPDK.  Any out-of-range mutation (e.g. setting `category_mask`
+    /// bits beyond `num_categories`) is caught at that point.
+    #[must_use]
+    pub const fn data_mut(&mut self) -> &mut RuleData {
+        &mut self.data
+    }
+
+    /// Borrow the field values.
+    #[must_use]
+    pub const fn fields(&self) -> &[AclField; N] {
+        &self.fields
+    }
+
+    /// Mutable access to the field values.
+    ///
+    /// See [`data_mut`][Rule::data_mut] for the re-validation note --
+    /// the same caveat applies: mutations are checked against the
+    /// build config at
+    /// [`add_rules`][super::context::AclContext::add_rules] time.
+    #[must_use]
+    pub const fn fields_mut(&mut self) -> &mut [AclField; N] {
+        &mut self.fields
+    }
+
+    /// Validate this rule's field values against the layout in
+    /// [`AclBuildConfig<N>`][super::config::AclBuildConfig].
+    ///
+    /// Run before each [`add_rules`][super::context::AclContext::add_rules]
+    /// call by the wrapper; exposed publicly so callers can pre-flight
+    /// rules in test fixtures or batch validators.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`InvalidRule`][super::error::InvalidRule] on the first
+    /// violation found.  Specifically catches:
+    ///
+    /// - **Soundness-critical:** a [`FieldType::Mask`][super::field::FieldType::Mask]
+    ///   field whose `mask_range` (prefix length) exceeds the field's bit
+    ///   width.  DPDK would compute `RTE_ACL_MASKLEN_TO_BITMASK(prefix_len, size)`
+    ///   on this, which shifts by `>= 8 * size` -- undefined behaviour in C.
+    /// - A [`FieldType::Range`][super::field::FieldType::Range] field with
+    ///   reversed low/high bounds.
+    /// - A `category_mask` with bits set at positions
+    ///   `>= config.num_categories()` (DPDK would silently mask them off).
+    ///
+    /// Each field is read through the union member that **DPDK** reads
+    /// for that field type:
+    ///
+    /// - [`FieldType::Mask`][super::field::FieldType::Mask]: `mask_range`
+    ///   is read via `u64_`, because DPDK feeds the entire 64-bit value
+    ///   to `RTE_ACL_MASKLEN_TO_BITMASK`.  Validating via the same view
+    ///   catches big-endian narrow writes (where `from_u8(_, 1)` lands
+    ///   at the MSB of the union and would shift by `>= 8 * size` -- UB
+    ///   in C).  On little-endian targets the `u64_` view and the
+    ///   size-specific view agree.
+    /// - [`FieldType::Range`][super::field::FieldType::Range]: `value` /
+    ///   `mask_range` are read through the size-appropriate union member
+    ///   (`u8_` for `FieldSize::One`, `u16_` for `Two`, `u32_` for `Four`),
+    ///   because DPDK's range-trie generator reads the bounds byte-wise
+    ///   over `size` bytes.  Garbage bits in wider union members are
+    ///   ignored: DPDK never reads through them for a size-narrower field.
+    /// - [`FieldType::Bitmask`][super::field::FieldType::Bitmask]: not
+    ///   validated here.  DPDK reads the bitmask byte-wise over `size`
+    ///   bytes and an unsatisfiable `value & !mask_range != 0` predicate
+    ///   produces a dead rule, not UB.
+    pub fn validate(
+        &self,
+        config: &super::config::AclBuildConfig<N>,
+    ) -> Result<(), super::error::InvalidRule> {
+        use super::error::InvalidRule;
+        use super::field::FieldType;
+
+        // category_mask: any bit at position >= num_categories will be
+        // silently masked out by DPDK at build time.  Reject up-front so
+        // the rule's intended category set is what actually gets matched.
+        let num_categories = config.num_categories();
+        let category_mask = self.data.category_mask.get();
+        let allowed_categories: u32 = if num_categories >= 32 {
+            u32::MAX
+        } else {
+            (1u32 << num_categories) - 1
+        };
+        let extra_bits = category_mask & !allowed_categories;
+        if extra_bits != 0 {
+            return Err(InvalidRule::CategoryMaskExceedsNumCategories {
+                category_mask,
+                num_categories,
+                extra_bits,
+            });
+        }
+
+        for def in config.field_defs() {
+            // field_index < N is guaranteed by AclBuildConfig::new.
+            let field = &self.fields[def.field_index() as usize];
+            let size_bytes = def.size() as u8;
+            let max_bits = u32::from(size_bytes) * 8;
+
+            match def.field_type() {
+                FieldType::Mask => {
+                    // DPDK reads `mask_range.u64` for MASK fields and
+                    // feeds it to `RTE_ACL_MASKLEN_TO_BITMASK`, which
+                    // shifts `(uint64_t)-1` by `8 * size - prefix_length`.
+                    // We must validate against the same view DPDK will
+                    // see: on big-endian, a narrow constructor like
+                    // `from_u8(_, 1)` lands at the most-significant
+                    // byte of the union and reading `mask_range.u64`
+                    // yields `1 << 56`, far exceeding `max_bits` and
+                    // making the C shift undefined.  Validating via
+                    // `mask_range_u64` rejects that input up-front
+                    // with a clear error rather than silently passing
+                    // a UB-triggering value to DPDK.  On little-endian
+                    // (currently the only tested target) the u64 view
+                    // and the size-specific view agree, so this
+                    // changes nothing for LE callers.
+                    let prefix_length = field.mask_range_u64();
+                    if prefix_length > u64::from(max_bits) {
+                        return Err(InvalidRule::PrefixLengthOutOfRange {
+                            field_index: def.field_index(),
+                            prefix_length,
+                            max_bits,
+                        });
+                    }
+                }
+                FieldType::Range => {
+                    // DPDK reads RANGE bounds byte-wise over `size`
+                    // bytes (see `acl_gen_range_trie`), so the
+                    // size-matching union member is the right view
+                    // for the bounds-ordering check.
+                    let (value, mask_range): (u64, u64) = match def.size() {
+                        super::field::FieldSize::One => (
+                            u64::from(field.value_u8()),
+                            u64::from(field.mask_range_u8()),
+                        ),
+                        super::field::FieldSize::Two => (
+                            u64::from(field.value_u16()),
+                            u64::from(field.mask_range_u16()),
+                        ),
+                        super::field::FieldSize::Four => (
+                            u64::from(field.value_u32()),
+                            u64::from(field.mask_range_u32()),
+                        ),
+                    };
+                    if value > mask_range {
+                        return Err(InvalidRule::RangeReversed {
+                            field_index: def.field_index(),
+                            low: value,
+                            high: mask_range,
+                        });
+                    }
+                }
+                FieldType::Bitmask => {
+                    // No wrapper-side check.  DPDK reads BITMASK
+                    // value/mask_range byte-wise over `size` bytes
+                    // and ignores wider bytes.  A user-mistake like
+                    // `value & !mask_range != 0` (an unsatisfiable
+                    // bitmask predicate) is not UB; it just produces
+                    // a dead rule.  If a future lint pass surfaces
+                    // those, it belongs in a separate diagnostic
+                    // module, not in the soundness-critical
+                    // validator here.
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+impl<const N: usize> fmt::Display for Rule<N> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Rule<{N}> {{ {}, fields: [", self.data)?;
+        for (i, field) in self.fields.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{field}")?;
+        }
+        write!(f, "] }}")
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Layout verification
+// ---------------------------------------------------------------------------
+
+/// Compile-time pins for the DPDK struct sizes that the [`Rule`]`<N>` layout
+/// formula depends on.
+///
+/// The field array in [`rte_acl_rule`][dpdk_sys::rte_acl_rule] starts at offset
+/// 16 (12 bytes of `rte_acl_rule_data` + 4 bytes of padding to reach 8-byte
+/// alignment of `rte_acl_field`), so the layout invariant
+/// `size_of::<Rule<N>>() == size_of::<rte_acl_rule>() + N * size_of::<rte_acl_field>()`
+/// is checked for every concrete `N` by [`Rule::_CHECK_LAYOUT`], not by spot
+/// checks here.
+const _: () = {
+    // rte_acl_rule_data is 12 bytes, alignment 4
+    assert!(mem::size_of::<dpdk_sys::rte_acl_rule_data>() == 12);
+    assert!(mem::align_of::<dpdk_sys::rte_acl_rule_data>() == 4);
+    // rte_acl_field is 16 bytes, alignment 8
+    assert!(mem::size_of::<dpdk_sys::rte_acl_field>() == 16);
+    assert!(mem::align_of::<dpdk_sys::rte_acl_field>() == 8);
+    // rte_acl_rule (with flexible array) is 16 bytes base, alignment 8
+    assert!(mem::size_of::<dpdk_sys::rte_acl_rule>() == 16);
+    assert!(mem::align_of::<dpdk_sys::rte_acl_rule>() == 8);
+};
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn rule_data_display() {
+        let data = RuleData {
+            category_mask: CategoryMask::new(0x1).unwrap(),
+            priority: Priority::new(100).unwrap(),
+            userdata: 42.try_into().unwrap(),
+        };
+        let s = format!("{data}");
+        assert!(s.contains("category_mask: 0x00000001"));
+        assert!(s.contains("priority: 100"));
+        assert!(s.contains("userdata: 42"));
+    }
+
+    // Tests below cross between union members (write narrow, read u64)
+    // and therefore observe the host's endianness.  Gated to LE because:
+    //
+    //   - On LE the narrow value lands in the low bytes of the union,
+    //     so reading u64 yields the same numeric value zero-extended.
+    //   - On BE the narrow value lands in the high bytes, so the
+    //     numeric u64 read would be `value << (64 - 8*size)`.
+    //
+    // The wrapper supports BE for the actual data flow (input data is
+    // in host byte order; DPDK reads through the matching union member
+    // on the same host).  Only the test's cross-width readback is
+    // endian-dependent.
+    #[cfg(target_endian = "little")]
+    #[test]
+    fn acl_field_from_u8_zeroes_upper_bytes() {
+        let field = AclField::from_u8(0xAB, 0xCD);
+        assert_eq!(field.value_u64(), 0xAB);
+        assert_eq!(field.mask_range_u64(), 0xCD);
+    }
+
+    #[cfg(target_endian = "little")]
+    #[test]
+    fn acl_field_from_u16_zeroes_upper_bytes() {
+        let field = AclField::from_u16(0xABCD, 0x1234);
+        assert_eq!(field.value_u64(), 0xABCD);
+        assert_eq!(field.mask_range_u64(), 0x1234);
+    }
+
+    #[cfg(target_endian = "little")]
+    #[test]
+    fn acl_field_from_u32_zeroes_upper_bytes() {
+        let field = AclField::from_u32(0xDEAD_BEEF, 0xFFFF_FF00);
+        assert_eq!(field.value_u64(), 0xDEAD_BEEF);
+        assert_eq!(field.mask_range_u64(), 0xFFFF_FF00);
+    }
+
+    #[test]
+    fn acl_field_from_u64_raw_full_range() {
+        let field = AclField::from_u64_raw(0x0123_4567_89AB_CDEF, 0xFEDC_BA98_7654_3210);
+        assert_eq!(field.value_u64(), 0x0123_4567_89AB_CDEF);
+        assert_eq!(field.mask_range_u64(), 0xFEDC_BA98_7654_3210);
+    }
+
+    #[test]
+    fn acl_field_zero_is_all_zero() {
+        let w = AclField::zero();
+        assert_eq!(w.value_u64(), 0);
+        assert_eq!(w.mask_range_u64(), 0);
+    }
+
+    #[test]
+    fn acl_field_equality() {
+        let a = AclField::from_u32(10, 20);
+        let b = AclField::from_u32(10, 20);
+        let c = AclField::from_u32(10, 21);
+        assert_eq!(a, b);
+        assert_ne!(a, c);
+    }
+
+    #[cfg(target_endian = "little")]
+    #[test]
+    fn acl_field_debug_is_hex() {
+        // The hex digits depend on which bytes of the u64 the narrow
+        // u32 write lands in -- LE-specific.  See the note on
+        // `acl_field_from_u8_zeroes_upper_bytes`.
+        let field = AclField::from_u32(0xFF, 0xAA);
+        let dbg = format!("{field:?}");
+        assert!(dbg.contains("0x00000000000000ff"), "got: {dbg}");
+        assert!(dbg.contains("0x00000000000000aa"), "got: {dbg}");
+    }
+
+    #[test]
+    fn rule_display() {
+        let rule: Rule<2> = Rule::new(
+            RuleData {
+                category_mask: CategoryMask::new(1).unwrap(),
+                priority: Priority::new(10).unwrap(),
+                userdata: 1.try_into().unwrap(),
+            },
+            [AclField::from_u32(0, 0), AclField::from_u16(80, 80)],
+        );
+        let s = format!("{rule}");
+        assert!(s.starts_with("Rule<2>"));
+    }
+
+    #[test]
+    fn rule_equality() {
+        let r1: Rule<1> = Rule::new(
+            RuleData {
+                category_mask: CategoryMask::new(1).unwrap(),
+                priority: Priority::new(1).unwrap(),
+                userdata: 1.try_into().unwrap(),
+            },
+            [AclField::from_u32(100, 200)],
+        );
+        let r2 = r1;
+        assert_eq!(r1, r2);
+    }
+
+    #[test]
+    fn rule_size_constant_matches_size_of() {
+        assert_eq!(Rule::<1>::RULE_SIZE as usize, mem::size_of::<Rule<1>>());
+        assert_eq!(Rule::<5>::RULE_SIZE as usize, mem::size_of::<Rule<5>>());
+        assert_eq!(Rule::<10>::RULE_SIZE as usize, mem::size_of::<Rule<10>>());
+    }
+
+    #[test]
+    fn priority_constants_match_dpdk() {
+        assert_eq!(priority::MIN, 1);
+        assert_eq!(
+            priority::MAX,
+            dpdk_sys::_bindgen_ty_4::RTE_ACL_MAX_PRIORITY as i32
+        );
+    }
+
+    /// Property: `Priority::new` accepts exactly the closed interval
+    /// `[priority::MIN, priority::MAX]` and rejects everything else.
+    #[test]
+    fn priority_new_validates_range() {
+        bolero::check!().with_type::<i32>().for_each(|value: &i32| {
+            let result = Priority::new(*value);
+            if (priority::MIN..=priority::MAX).contains(value) {
+                let p = result.unwrap_or_else(|_| {
+                    panic!("Priority::new({value}) should accept in-range value")
+                });
+                assert_eq!(p.get(), *value);
+            } else {
+                assert!(
+                    result.is_err(),
+                    "Priority::new({value}) should reject out-of-range value"
+                );
+            }
+        });
+    }
+}
diff --git a/dpdk/src/eal.rs b/dpdk/src/eal.rs
index 65a450b94d..0e633aa1cc 100644
--- a/dpdk/src/eal.rs
+++ b/dpdk/src/eal.rs
@@ -8,10 +8,10 @@ use alloc::ffi::CString;
 use alloc::format;
 use alloc::string::ToString;
 use alloc::vec::Vec;
+use core::ffi::CStr;
 use core::ffi::c_int;
 use core::fmt::{Debug, Display};
 use dpdk_sys;
-use std::ffi::CStr;
 use tracing::{error, info, warn};
 
 /// Safe wrapper around the DPDK Environment Abstraction Layer (EAL).
@@ -83,7 +83,12 @@ impl ValidatedEalArgs {
     ) -> Result<ValidatedEalArgs, IllegalEalArguments> {
         let args: Vec<_> = args.into_iter().map(|s| s.as_ref().to_string()).collect();
         let len = args.len();
-        if len > c_int::MAX as usize {
+        // Reserve one slot for the argv[0] placeholder that `init` prepends
+        // before calling rte_eal_init.  Without this, len == c_int::MAX as
+        // usize would pass validation here and then overflow the i32 cast
+        // when computing argc for rte_eal_init.
+        const MAX_USER_ARGS: usize = (c_int::MAX as usize).saturating_sub(1);
+        if len > MAX_USER_ARGS {
             return Err(IllegalEalArguments::TooLong(len));
         }
         match args.iter().find(|s| !s.is_ascii()) {
@@ -109,7 +114,8 @@ impl ValidatedEalArgs {
 ///
 /// Panics if
 ///
-/// 1. There are more than `c_int::MAX` arguments.
+/// 1. There are more than `c_int::MAX - 1` arguments (the `-1` reserves a
+///    slot for the `argv[0]` placeholder).
 /// 2. The arguments are not valid ASCII strings.
 /// 3. The EAL initialization fails.
 /// 4. The EAL has already been initialized.
@@ -127,8 +133,64 @@ pub fn init(args: impl IntoIterator<Item = impl AsRef<str>>) -> Eal {
         let mut args = ValidatedEalArgs::new(args).unwrap_or_else(|e| {
             Eal::fatal_error(e.to_string());
         });
-        let mut c_args: Vec<_> = args.0.iter_mut().map(|s| s.as_ptr().cast_mut()).collect();
+        // EAL treats argv[0] as the program name and ignores it; this
+        // slot would otherwise eat the first real flag.  We sidestep
+        // this by prepending a placeholder program name as the first
+        // owned CString.
+        args.0.insert(0, c"dataplane".to_owned());
+
+        // Move every CString into a raw `*mut c_char` via
+        // `CString::into_raw`.  This is the only safe way to obtain a
+        // pointer with full mutable provenance for FFI: `as_ptr()` on
+        // a `CString` (or `&CString` reborrowed from `&mut CString`)
+        // carries SharedReadOnly provenance under Stacked / Tree
+        // Borrows, and any write through `as_ptr().cast_mut()` would
+        // be UB even though the allocation is writable.
+        //
+        // The pinned DPDK source (`rte_eal_init` + its getopt-based
+        // option parser) only permutes the argv **pointer array** --
+        // it does not modify the bytes of any individual argv string
+        // and does not change any string's NUL-terminated length.
+        // The `CString::from_raw` cleanup below depends on that:
+        // `from_raw` is only sound if the string length is unchanged
+        // from what `into_raw` produced.
+        //
+        // We still use `into_raw` (rather than `as_ptr().cast_mut()`)
+        // because `rte_eal_init`'s public contract permits the EAL or
+        // any argument parser it calls to modify argv strings in
+        // place (`setproctitle`-style program-name manipulation,
+        // `getopt_long`-style `optarg` rewrites).  Our pinned DPDK
+        // does not exercise that allowance, but `into_raw` gives us
+        // mut-clean pointer provenance regardless.  If a future DPDK
+        // upgrade ever started rewriting argv strings in place, the
+        // round-trip here is still pointer-provenance-sound but the
+        // reclamation path would need to switch to a non-length-
+        // dependent strategy (e.g. `libc::free` on the original
+        // pointers, then `mem::forget` the CStrings).
+        //
+        // Reclamation note: `rte_eal_init` does getopt-style permutation
+        // on the argv array, so the order in `c_args` after the FFI
+        // call is **not** the order on entry.  We snapshot the
+        // pre-init pointer list in `original_ptrs` to reclaim each
+        // CString exactly once with `CString::from_raw`, regardless
+        // of how DPDK reorders `c_args`.  The `_reclaimed` Vec must
+        // drop **before** the scope exits (and therefore before the
+        // `RteAllocator::mark_initialized` allocator swap below) so
+        // the system allocator that produced each CString is the one
+        // that frees it.
+        let mut c_args: Vec<*mut core::ffi::c_char> =
+            args.0.drain(..).map(CString::into_raw).collect();
+        let original_ptrs: Vec<*mut core::ffi::c_char> = c_args.clone();
         let ret = unsafe { dpdk_sys::rte_eal_init(c_args.len() as _, c_args.as_mut_ptr() as _) };
+        // SAFETY: each pointer in `original_ptrs` came from
+        // `CString::into_raw` above; we have not transferred ownership
+        // elsewhere (DPDK does not retain pointers from argv after
+        // `rte_eal_init` returns).  Using the pre-init snapshot avoids
+        // aliasing if DPDK permuted `c_args`.
+        let _reclaimed: Vec<CString> = original_ptrs
+            .into_iter()
+            .map(|p| unsafe { CString::from_raw(p) })
+            .collect();
         if ret < 0 {
             EalErrno::assert(unsafe { dpdk_sys::rte_errno_get() });
         }
diff --git a/dpdk/src/lib.rs b/dpdk/src/lib.rs
index 4b06491c04..26ea73a048 100644
--- a/dpdk/src/lib.rs
+++ b/dpdk/src/lib.rs
@@ -33,6 +33,7 @@
 extern crate alloc;
 extern crate core;
 
+pub mod acl;
 pub mod dev;
 pub mod eal;
 pub mod flow;
diff --git a/justfile b/justfile
index 63450dc071..9b9d520dd9 100644
--- a/justfile
+++ b/justfile
@@ -52,17 +52,32 @@ _cargo_profile_flag := if profile == "debug" { "" } else { "--profile " + profil
 
 # filters for nextest
 #
-# Under `shuttle`, isolate the bolero x shuttle suite (a `shuttle`
-# substring matches both the test binary and the test-name
-# convention).
+# Under `shuttle`, the legacy `dataplane-quiescent` test layout had a
+# `shuttle` binary that hosted the bolero x shuttle suite, and we used
+# `--package=shuttle` (now an `-E 'package(shuttle)'`-style filter
+# embedded in nextest's argv) to isolate it.  Today that suite lives in
+# `concurrency/tests/quiescent_shuttle.rs`, and the test binary is
+# `quiescent_shuttle`; matching the substring `shuttle` is good enough.
 #
-# Under `loom`, the legacy filter `-E 'binary(loom)'` matched the
-# old `dataplane-quiescent` `loom` test binary.  After absorbing
-# the crate, the binary is `quiescent_loom` (and later test files
-# add more); an empty filter lets nextest walk every archived
-# binary.  Tests that don't apply under loom are cfg-gated out and
-# compile to zero entries.
-filter := if features == "shuttle" { "shuttle" } else { "" }
+# Under `loom`, the legacy filter `-E 'binary(loom)'` matched
+# `quiescent_loom`, the single integration-test binary that opted into
+# `loom::model`.  After the concurrency rework, loom-compatible tests
+# are spread across multiple binaries (`quiescent_model`,
+# `thread_scope`, `arc_weak`, `stress_dispatch`); the rest are gated
+# with `#![cfg(not(any(feature = "loom", ...)))]` and compile down to
+# zero tests under the loom feature.  An empty filter is therefore the
+# right answer: nextest walks every archived binary, the cfg-gated
+# ones contain no tests, and the loom-compatible ones run under their
+# `#[concurrency::test]`-routed `loom::model` body.
+# Match all shuttle variants (`shuttle`, `shuttle_pct`, `shuttle_dfs`).
+# Under any shuttle backend, `concurrency::sync` types ARE shuttle
+# primitives, and touching them outside a `shuttle::check_*`-wrapped
+# body panics with `ExecutionState NotSet`. Tests that are designed
+# to run under shuttle either go through `#[concurrency::test]` (which
+# emits a `concurrency_model::<backend>` leaf -- the substring matches)
+# or live in a `*_shuttle` module / `*shuttle*` binary by convention.
+# Other workspace tests would fail spuriously without this filter.
+filter := if features =~ "^shuttle" { "shuttle" } else { "" }
 
 # instrumentation mode (none/coverage)
 instrument := "none"
diff --git a/nix/pkgs/dpdk-wrapper/src/dpdk_wrapper.h b/nix/pkgs/dpdk-wrapper/src/dpdk_wrapper.h
index 3ebfe21e7d..a8b881dfc3 100644
--- a/nix/pkgs/dpdk-wrapper/src/dpdk_wrapper.h
+++ b/nix/pkgs/dpdk-wrapper/src/dpdk_wrapper.h
@@ -3,6 +3,7 @@
 
 #include <rte_config.h>
 
+#include <rte_acl.h>
 #include <rte_alarm.h>
 #include <rte_atomic.h>
 #include <rte_bitmap.h>
diff --git a/nix/pkgs/dpdk/default.nix b/nix/pkgs/dpdk/default.nix
index 5716bc3c74..c02a0b78b5 100644
--- a/nix/pkgs/dpdk/default.nix
+++ b/nix/pkgs/dpdk/default.nix
@@ -47,7 +47,6 @@ stdenv.mkDerivation {
   mesonFlags =
     let
       disabledLibs = [
-        "acl"
         "argparse"
         "bbdev"
         "bitratestats"
@@ -86,6 +85,7 @@ stdenv.mkDerivation {
         "table"
       ];
       enabledLibs = [
+        "acl"
         "cryptodev" # required for vhost
         "dmadev" # required by vhost
         "ethdev"