diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 436adba805..1465dd1fa8 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -144,8 +144,7 @@ jobs: profile: "debug" sanitize: "" instrument: "none" - - &release-build - name: "release" + - name: "release" profile: "release" sanitize: "" # TODO: enable cfi and safe-stack when possible instrument: "none" @@ -502,45 +501,71 @@ jobs: recipe_args: "${{ matrix.recipe.args }}" - *tmate - features: + concurrency: if: >- ${{ needs.check_changes.outputs.devfiles == 'true' || startsWith(github.event.ref, 'refs/tags/v') || github.event_name == 'workflow_dispatch' }} - name: "features/${{ matrix.build.name }}/${{ matrix.features }}" + name: "concurrency" runs-on: "lab" needs: - check_changes - check permissions: *check-perms - env: *check-env - strategy: - fail-fast: false - max-parallel: 1 - matrix: - include: - # The `loom` feature flips `concurrency::sync` to loom's - # primitives workspace-wide, which breaks crates that rely on - # `Weak`, `Arc::downgrade`, etc. (those aren't in - # `loom::sync`). Scope the loom build to only the concurrency - # package (which hosts the quiescent tests) so workspace - # feature unification doesn't poison unrelated crates. - - build: *release-build - features: "loom" - test_package: "concurrency" - - build: *release-build - features: "shuttle" - test_package: "" + # This job doesn't use the `*check-env` anchor: that anchor's + # `JUST_VARS` references `matrix.build.*`, and this job has no + # matrix. Each step below sets `JUST_VARS` itself, inlining the + # docker_sock / debug_justfile / oci_repo settings that the + # anchor would have provided. + env: + USER: "runner" steps: - *checkout - *nix-setup - - name: "test/${{ matrix.features }}" + - name: "shuttle" + env: + JUST_VARS: >- + docker_sock=/run/docker/docker.sock + debug_justfile=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_justfile || false }} + profile=release + features=shuttle + oci_repo=ghcr.io + uses: *just + with: + recipe: "test" + - name: "shuttle_pct" + env: + JUST_VARS: >- + docker_sock=/run/docker/docker.sock + debug_justfile=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_justfile || false }} + profile=release + features=shuttle_pct + oci_repo=ghcr.io + uses: *just + with: + recipe: "test" + # The `loom` feature flips `concurrency::sync` to loom's + # primitives workspace-wide, which breaks crates that rely on + # `Weak`, `Arc::downgrade`, etc. (those aren't in + # `loom::sync`). Scope the loom build to only the concurrency + # package (which hosts the core concurrency tests) so workspace + # feature unification doesn't poison unrelated crates. + # + # TODO: gate tests which can't be used with loom + - name: "loom" + env: + JUST_VARS: >- + docker_sock=/run/docker/docker.sock + debug_justfile=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_justfile || false }} + profile=release + features=loom + oci_repo=ghcr.io uses: *just with: recipe: "test" - recipe_args: "${{ matrix.test_package }}" + recipe_args: "concurrency" - *tmate vlab: @@ -633,7 +658,7 @@ jobs: needs: - check - sanitize - - features + - concurrency - build - vlab - test_each @@ -653,10 +678,10 @@ jobs: run: | echo '::error:: Some check job(s) failed' exit 1 - - name: "Flag any features matrix failures" - if: ${{ needs.features.result != 'success' && needs.features.result != 'skipped' }} + - name: "Flag any concurrency job failures" + if: ${{ needs.concurrency.result != 'success' && needs.concurrency.result != 'skipped' }} run: | - echo '::error:: Some features job(s) failed' + echo '::error:: concurrency job failed' exit 1 - name: "Flag any test_each matrix failures" if: ${{ needs.test_each.result != 'success' && needs.test_each.result != 'skipped' }} diff --git a/Cargo.lock b/Cargo.lock index 0cb905debb..515ab160b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -513,7 +513,7 @@ version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a21a3b022507b9edd2050caf370d945e398c1a7c8455531220fa3968c45d29e" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 2.0.0", "proc-macro2", "quote", "syn 2.0.117", @@ -1252,6 +1252,7 @@ dependencies = [ name = "dataplane-concurrency-macros" version = "0.21.0" dependencies = [ + "proc-macro-crate 3.5.0", "proc-macro2", "quote", "syn 2.0.117", @@ -1285,10 +1286,14 @@ dependencies = [ name = "dataplane-dpdk" version = "0.21.0" dependencies = [ + "bolero", + "dataplane-concurrency", "dataplane-dpdk-sys", "dataplane-dpdk-sysroot-helper", "dataplane-errno", + "dataplane-id", "dataplane-net", + "nix 0.31.3", "serde", "thiserror", "tracing", @@ -4319,6 +4324,15 @@ dependencies = [ "toml_edit 0.20.7", ] +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit 0.25.11+spec-1.1.0", +] + [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -5729,7 +5743,7 @@ checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ "serde", "serde_spanned", - "toml_datetime", + "toml_datetime 0.6.11", "toml_edit 0.22.27", ] @@ -5742,6 +5756,15 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + [[package]] name = "toml_edit" version = "0.20.7" @@ -5749,7 +5772,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81" dependencies = [ "indexmap 2.14.0", - "toml_datetime", + "toml_datetime 0.6.11", "winnow 0.5.40", ] @@ -5762,11 +5785,32 @@ dependencies = [ "indexmap 2.14.0", "serde", "serde_spanned", - "toml_datetime", + "toml_datetime 0.6.11", "toml_write", "winnow 0.7.15", ] +[[package]] +name = "toml_edit" +version = "0.25.11+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +dependencies = [ + "indexmap 2.14.0", + "toml_datetime 1.1.1+spec-1.1.0", + "toml_parser", + "winnow 1.0.3", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow 1.0.3", +] + [[package]] name = "toml_write" version = "0.1.2" @@ -6385,6 +6429,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "winnow" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/concurrency-macros/Cargo.toml b/concurrency-macros/Cargo.toml index cbd7b35dd1..4a415d3b01 100644 --- a/concurrency-macros/Cargo.toml +++ b/concurrency-macros/Cargo.toml @@ -14,6 +14,12 @@ shuttle = [] silence_clippy = [] [dependencies] +# `proc-macro-crate` resolves the consumer's actual import name for +# `dataplane-concurrency`. This crate is publishable, so the `test` +# macro cannot assume a fixed `::concurrency` alias -- workspace +# consumers often rename it, external users typically don't. See +# `pub fn test` for how the resolution feeds into the emitted path. +proc-macro-crate = { workspace = true, default-features = true } proc-macro2 = { workspace = true, default-features = true } quote = { workspace = true, default-features = true } syn = { workspace = true, default-features = true, features = ["full"] } diff --git a/concurrency-macros/src/lib.rs b/concurrency-macros/src/lib.rs index 0239477e10..8d2ee227eb 100644 --- a/concurrency-macros/src/lib.rs +++ b/concurrency-macros/src/lib.rs @@ -2,13 +2,45 @@ // Copyright Open Network Fabric Authors use proc_macro::TokenStream; +use proc_macro_crate::{FoundCrate, crate_name}; +use proc_macro2::{Span, TokenStream as TokenStream2}; use quote::quote; use syn::{ - Ident, Item, + Ident, Item, ItemFn, parse::{Parse, ParseStream}, parse_macro_input, }; +/// Resolve a path prefix for `dataplane-concurrency` in the consumer's +/// `Cargo.toml`. Returns a token stream that resolves to the crate root, +/// so callers can append `::stress` or `::with_loom` etc. +/// +/// * Workspace consumer with `concurrency = { package = "dataplane-concurrency", ... }` +/// in its `Cargo.toml`: returns `::concurrency`. +/// * External consumer with `dataplane-concurrency = "..."` directly: +/// returns `::dataplane_concurrency`. +/// * `dataplane-concurrency`'s own integration tests: returns +/// `::dataplane_concurrency` (which requires the test file to do +/// `extern crate dataplane_concurrency;` -- cargo doesn't let a crate +/// list itself as a regular dev-dep, but `extern crate` works in the +/// integration test). +fn concurrency_crate_path() -> TokenStream2 { + match crate_name("dataplane-concurrency") { + Ok(FoundCrate::Itself) => { + let ident = Ident::new("dataplane_concurrency", Span::call_site()); + quote! { ::#ident } + } + Ok(FoundCrate::Name(name)) => { + let ident = Ident::new(&name, Span::call_site()); + quote! { ::#ident } + } + Err(_) => { + let ident = Ident::new("dataplane_concurrency", Span::call_site()); + quote! { ::#ident } + } + } +} + struct ConcurrencyModeArgs { mode: Ident, } @@ -38,20 +70,21 @@ pub fn concurrency_mode(attr: TokenStream, item: TokenStream) -> TokenStream { let item = parse_macro_input!(item as Item); let mode = args.mode.to_string(); + let krate = concurrency_crate_path(); let output = match mode.as_str() { "shuttle" => quote! { - ::concurrency::with_shuttle! { + #krate::with_shuttle! { #item } }, "loom" => quote! { - ::concurrency::with_loom! { + #krate::with_loom! { #item } }, "std" => quote! { - ::concurrency::with_std! { + #krate::with_std! { #item } }, @@ -67,3 +100,143 @@ pub fn concurrency_mode(attr: TokenStream, item: TokenStream) -> TokenStream { output.into() } + +/// Mark a function as a test that runs under whichever concurrency backend +/// is currently selected on `dataplane-concurrency`. +/// +/// Under the default (production) backend, expands to a flat +/// `#[test] fn () { concurrency::stress(|| { original }) }`, +/// which calls the body once. +/// +/// Under any model-checker backend (`loom`, `shuttle`, `shuttle_pct`, +/// `shuttle_dfs`), expands to a nested module so the test's binary +/// path identifies the active backend in nextest reports / JUnit +/// output: +/// +/// ```text +/// // #[concurrency::test] fn some_test() { body } +/// // under `--features loom`: +/// mod some_test { +/// mod concurrency_model { +/// #[test] +/// fn loom() { concurrency::stress(|| body) } +/// } +/// } +/// ``` +/// +/// The same shape applies for `shuttle` / `shuttle_pct` / `shuttle_dfs`, +/// each writing the function name that names the active backend. +/// Nextest filters like `-E 'test(/concurrency_model::loom$/)'` then +/// pick out the loom-backed runs cleanly without having to grep on +/// binary names. +/// +/// # Example +/// +/// ```ignore +/// #[concurrency::test] +/// fn snapshot_observes_a_legal_value() { +/// // ... body uses concurrency::sync, concurrency::thread ... +/// } +/// ``` +/// +/// The function must take no arguments and return `()`. The body is +/// captured as a closure, so it must be `Fn() + Send + Sync + 'static` +/// (no borrows of locals, no `FnOnce`-only constructs). This matches +/// what `loom::model` and `shuttle::check_*` require. +/// +/// # Limitations +/// +/// * **Single-threaded bodies fail under `shuttle_pct`.** Shuttle's PCT +/// scheduler panics at runtime if the test closure does not exercise +/// any concurrent atomic / thread operation (no `thread::spawn`, no +/// contended `Mutex`/`Arc`). The detection is dynamic, so the macro +/// cannot reject these statically; if you need such a test, gate it +/// with `#[cfg(not(feature = "shuttle_pct"))]` or use a regular +/// `#[test]` for the default-only smoke check. +/// * **Async bodies and arguments are rejected at parse time** with a +/// clear compile error. +#[proc_macro_attribute] +pub fn test(_attr: TokenStream, item: TokenStream) -> TokenStream { + let func = parse_macro_input!(item as ItemFn); + + let attrs = &func.attrs; + let vis = &func.vis; + let sig = &func.sig; + let block = &func.block; + let fn_name = &sig.ident; + + if let Some(asyncness) = sig.asyncness { + return syn::Error::new_spanned( + asyncness, + "#[concurrency::test] does not support async functions yet", + ) + .to_compile_error() + .into(); + } + if !sig.inputs.is_empty() { + return syn::Error::new_spanned( + &sig.inputs, + "#[concurrency::test] functions must take no arguments", + ) + .to_compile_error() + .into(); + } + + let krate = concurrency_crate_path(); + // Default backend: flat `#[test] fn () { ... }`. No nested + // module wrapping -- the production code path runs the body once, + // and there is no second backend to disambiguate from. + // + // Model-checker backends: emit `mod { mod concurrency_model + // { #[test] fn () { ... } } }`. The leaf function name + // identifies the active backend, so a nextest report shows entries + // like `some_test::concurrency_model::loom` and a filter like + // `-E 'test(/concurrency_model::loom$/)'` picks them out + // unambiguously. + quote! { + #[cfg(not(any(feature = "loom", feature = "shuttle")))] + #[::core::prelude::v1::test] + #(#attrs)* + #vis #sig { + #krate::stress(|| #block); + } + + #[cfg(any(feature = "loom", feature = "shuttle"))] + #[allow(non_snake_case)] + mod #fn_name { + use super::*; + mod concurrency_model { + use super::*; + + #[cfg(feature = "loom")] + #[::core::prelude::v1::test] + #(#attrs)* + fn loom() { + #krate::stress(|| #block); + } + + #[cfg(all(feature = "shuttle", not(feature = "shuttle_pct")))] + #[::core::prelude::v1::test] + #(#attrs)* + fn shuttle() { + #krate::stress(|| #block); + } + + #[cfg(all(feature = "shuttle_pct", not(feature = "shuttle_dfs")))] + #[::core::prelude::v1::test] + #(#attrs)* + fn shuttle_pct() { + #krate::stress(|| #block); + } + + #[cfg(feature = "shuttle_dfs")] + #[::core::prelude::v1::test] + #(#attrs)* + fn shuttle_dfs() { + #krate::stress(|| #block); + } + } + } + } + .into() +} diff --git a/concurrency/Cargo.toml b/concurrency/Cargo.toml index 0ef625da08..3cd3e072dd 100644 --- a/concurrency/Cargo.toml +++ b/concurrency/Cargo.toml @@ -14,15 +14,20 @@ parking_lot = ["dep:parking_lot"] # * `shuttle` -- shuttle with the random scheduler (the default # for first-time users -- you almost always want # this one). -# * `shuttle_pct` -- shuttle with the PCT scheduler. Use when you +# * `shuttle_pct` -- shuttle with the PCT scheduler. Use when you # want to bias toward rare interleavings. -# * `shuttle_dfs` -- shuttle with the DFS scheduler. Use for +# * `shuttle_dfs` -- shuttle with the DFS scheduler. Use for # exhaustive small-state exploration. # -# Arranged as a chain (`shuttle_dfs` -> `shuttle_pct` -> `shuttle`) -# so a single `feature = "shuttle"` cfg check is true under every -# variant. The scheduler is selected at runtime by -# `concurrency::stress` (added in a later PR of this stack). +# All three share the same `dep:shuttle` machinery; only the scheduler +# selected at runtime differs. See `concurrency::stress` for the +# dispatch and the `#[concurrency::test]` attribute macro for the +# write-once-run-everywhere wrapper. +# +# The features form a chain (`shuttle_dfs` -> `shuttle_pct` -> `shuttle`) +# so that any `feature = "shuttle"` cfg check is true under all three +# variants. cfg_select-style precedence (most-specific first) still +# picks the right scheduler. See `concurrency::stress`. shuttle = ["dep:shuttle", "concurrency-macros/shuttle"] shuttle_pct = ["shuttle"] shuttle_dfs = ["shuttle_pct"] diff --git a/concurrency/QUIESCENT.md b/concurrency/QUIESCENT.md index 8e7cd7f271..e12898be30 100644 --- a/concurrency/QUIESCENT.md +++ b/concurrency/QUIESCENT.md @@ -24,7 +24,7 @@ graph TD ## Quick start ```rust,ignore -use dataplane_quiescent::channel; +use dataplane_concurrency::quiescent::channel; #[derive(Debug)] struct MyConfig { /* ... */ } @@ -142,7 +142,7 @@ profile: Construction: ```rust,ignore -use dataplane_quiescent::channel; +use dataplane_concurrency::quiescent::channel; let publisher = channel(initial_value); ``` @@ -354,7 +354,7 @@ the `Publisher`. This makes the destructor-thread-affinity guarantee a In practice: ```rust,ignore -use dataplane_quiescent::channel; +use dataplane_concurrency::quiescent::channel; let publisher = channel(initial); diff --git a/concurrency/src/lib.rs b/concurrency/src/lib.rs index 10415eb60a..91035d61e0 100644 --- a/concurrency/src/lib.rs +++ b/concurrency/src/lib.rs @@ -1,6 +1,64 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright Open Network Fabric Authors +//! Backend-routed concurrency primitives for the dataplane workspace. +//! +//! Re-exports a uniform `parking_lot`-shaped surface that compiles +//! unchanged under the production backend, `loom`, and `shuttle*`. +//! `#[concurrency::test]` + `concurrency::stress` let a single source +//! file exercise either the production code path or the model-checker +//! of choice. +//! +//! # "Compiles under loom" != "exhaustively checked under loom" +//! +//! Several documented shim limitations let code keep compiling +//! against the facade without being meaningfully model-checked for +//! the schedules that matter. Authors writing new model-check +//! coverage should be aware of the gaps: +//! +//! * **`Weak` under loom** holds a strong clone of the inner +//! `Arc` (loom 0.7 ships no `Weak` of its own), so +//! `Weak::upgrade` *always* returns `Some` after a successful +//! `Arc::downgrade`. The race a loom test would want to expose -- +//! "the last `Arc` dropped between my `Weak::upgrade` check and my +//! use" -- is unreachable. Code that depends on the +//! upgrade-fails-after-last-strong-drop semantics needs a different +//! testing strategy (real OS threads + tsan, or a hand-rolled +//! model). Concrete workspace consequence: NAT's allocator/port- +//! forwarder paths use `Weak::upgrade().is_none()` as the liveness +//! signal for cleanup (see `nat/src/stateful/apalloc/alloc.rs` and +//! `port_alloc.rs`); under loom that signal never fires, so those +//! paths are *not* exercised. NAT is not in the loom test matrix +//! today, which is consistent with that limit; do not add it +//! without first reworking the Weak usage or extending the shim. +//! * **`RwLock::upgradable_read` under loom/shuttle** is implemented +//! on top of an exclusive `write()`. Sound -- no schedule +//! `parking_lot` allows is forbidden here -- but lossy: the model +//! checker never explores the many-readers-plus-one-upgradable +//! schedule that `parking_lot` permits. Tests that hinge on that +//! interleaving need `RwLock` with explicit `read()` then +//! `write()`, or a richer state machine in the facade. +//! * **`static FOO: Mutex = Mutex::new(...)` does not compile +//! under loom.** `loom::sync::Mutex::new` is plain `fn`, not +//! `const fn`, so a static initialiser fails to typecheck. Use +//! `OnceLock` for the static (the facade re-exports +//! `std::sync::OnceLock` under all backends) or move the +//! construction into a runtime initialiser gated by +//! `#[concurrency_mode(std)]`. +//! * **`OnceLock` under loom/shuttle** is the real `std::sync::OnceLock`, +//! not a model-aware shim. Loom and shuttle do not see the +//! atomics inside `OnceLock::get_or_init`, so tests whose +//! correctness depends on the *ordering* of a once-initialised +//! publication are not covered. `OnceLock` is sound here for the +//! "compute lazily once" pattern; the publish-ordering story +//! needs a separate `AtomicX` + `Acquire/Release` pair that the +//! model checker *can* trace. +//! +//! The `_strict_provenance` feature forces the `Mutex>` +//! fallback slot even under the default backend; the CI miri matrix +//! exercises both `ArcSwap` (production) and that fallback to widen +//! coverage. + #![deny( unsafe_code, missing_docs, @@ -13,38 +71,23 @@ #![allow(missing_docs)] pub mod macros; +mod stress; pub mod sync; +pub mod thread; + +// `stress` is `pub` so the expansion of `#[concurrency::test]` in +// downstream crates can name it. It is not part of the recommended +// public surface; the macro is. `#[doc(hidden)]` keeps the symbol +// off rustdoc, leaving users to land on `#[concurrency::test]`. +#[doc(hidden)] +pub use stress::stress; #[cfg(all(miri, any(feature = "shuttle", feature = "loom")))] compile_error!("miri does not meaningfully support 'loom' or 'shuttle'"); -#[cfg(not(any(feature = "loom", feature = "shuttle")))] -pub use std::thread; - -#[cfg(all( - feature = "loom", - not(feature = "shuttle"), - not(feature = "silence_clippy") -))] -pub use loom::thread; - -#[cfg(all( - feature = "shuttle", - not(feature = "loom"), - not(feature = "silence_clippy") -))] -pub use shuttle::thread; - #[cfg(all(feature = "shuttle", feature = "loom", not(feature = "silence_clippy")))] compile_error!("Cannot enable both 'loom' and 'shuttle' features at the same time"); -////////////////////// -// This is a workaround to silence clippy warnings when both loom and shuttle -// features are enabled in the clippy checks which uses --all-features. -#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))] -pub use std::thread; -////////////////////// - #[cfg(all(feature = "silence_clippy", not(feature = "shuttle")))] compile_error!("silence_clippy manually enabled, should only be enabled by --all-features"); diff --git a/concurrency/src/macros.rs b/concurrency/src/macros.rs index 6dbe7a5658..63e88ef4f6 100644 --- a/concurrency/src/macros.rs +++ b/concurrency/src/macros.rs @@ -160,4 +160,4 @@ macro_rules! with_std { ($($item:item)*) => {}; } -pub use concurrency_macros::concurrency_mode; +pub use concurrency_macros::{concurrency_mode, test}; diff --git a/concurrency/src/quiescent.rs b/concurrency/src/quiescent.rs index ef21ca0d56..81ed7514a8 100644 --- a/concurrency/src/quiescent.rs +++ b/concurrency/src/quiescent.rs @@ -196,18 +196,17 @@ impl Version { // LCOV_EXCL_START - reaching this path is itself the failure; // chasing coverage of it is absurd. See the comment below. core::hint::cold_path(); - #[allow(clippy::panic)] - { - // This whole path is technically reachable, but only technically. - // If you got config updates 1B times per second on average it would - // still take 584 years to wrap around. Even that requires us to receive - // and process config updates faster than the line rate of an 800Gb/s NIC. - // For hundreds of years. With no reboot. - // - // The only realistic way to reach this point is via a bug in this code, - // not via normal operation. - panic!("Version wrapped! This is a bug"); - } + // This whole path is technically reachable, but only technically. + // If you got config updates 1B times per second on average it would + // still take 584 years to wrap around. Even that requires us to receive + // and process config updates faster than the line rate of an 800Gb/s NIC. + // For hundreds of years. With no reboot. + // + // The only realistic way to reach this point is via a bug in this code, + // not via normal operation -- this is exactly what `unreachable!()` is + // for per development/programming-rules/error-handling.md. The + // formatting variant is non-const, so we use the bare form. + unreachable!() // LCOV_EXCL_STOP } } diff --git a/concurrency/src/slot.rs b/concurrency/src/slot.rs index 4b42d55868..291fbd846b 100644 --- a/concurrency/src/slot.rs +++ b/concurrency/src/slot.rs @@ -15,6 +15,17 @@ //! protocol -- atomic publish, atomic load -- which is all the model //! checker needs to see. //! +//! **Important coverage limit.** Model-checker tests that go through +//! `Slot` / `SlotOption` exercise the *protocol* of a single-slot +//! atomic publication: one writer swaps, readers see either old or +//! new, no torn read. They do *not* exercise `arc_swap`'s internal +//! hazard-pointer machinery, which is what the production path +//! actually runs. A bug inside `arc_swap` itself (e.g. a missed +//! retire, an incorrect epoch comparison) cannot surface under loom +//! or shuttle here. If you want coverage of `arc_swap`'s internals, +//! the miri job (which runs against the real `ArcSwap` in +//! permissive-provenance mode) is where it lives. +//! //! [`Subscriber::snapshot`]: crate::Subscriber::snapshot // Strict provenance checks fail with arc-swap since it uses hazard pointers and does not (yet) use the new diff --git a/concurrency/src/stress.rs b/concurrency/src/stress.rs new file mode 100644 index 0000000000..243b48ec2b --- /dev/null +++ b/concurrency/src/stress.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Backend dispatch for model-checking tests. +//! +//! [`stress`] runs `body` under whichever concurrency backend the crate +//! was compiled against: +//! +//! * default backend -- direct call, no scheduling exploration +//! * `loom` feature -- `loom::model` +//! * `shuttle` feature -- `shuttle::check_random` +//! * `shuttle_pct` feature -- `shuttle::check_pct` +//! * `shuttle_dfs` feature -- `shuttle::check_dfs` (capped at `ITERATIONS`) +//! +//! `lib.rs` `compile_error!`s if both `loom` and any `shuttle*` are +//! enabled at once, so only one branch should ever fire in a real +//! build. Under `--all-features` the `silence_clippy` escape hatch +//! suppresses that error and the `cfg_select!` below resolves the +//! arms in this order: `loom > shuttle_dfs > shuttle_pct > shuttle`. +//! Same precedence the routing in `concurrency::sync` uses. +//! +//! Tests written once exercise any of these by toggling features on the +//! crate. The `#[concurrency::test]` attribute (in `concurrency-macros`) +//! is a thin wrapper that calls this function for you. + +/// Run `body` under the currently selected concurrency backend. +/// +/// See the module docs for the per-backend dispatch table. +pub fn stress(body: F) +where + F: Fn() + Send + Sync + 'static, +{ + // The feature lattice in `Cargo.toml` makes `feature = "shuttle"` + // true under any shuttle variant, so the const-cfgs here are + // correspondingly simple: ITERATIONS is needed by any shuttle arm, + // SCHEDULES is only consumed by the shuttle_pct arm. + #[cfg(all(not(feature = "loom"), feature = "shuttle"))] + const ITERATIONS: usize = 16; + #[cfg(all( + not(feature = "loom"), + not(feature = "shuttle_dfs"), + feature = "shuttle_pct" + ))] + const SCHEDULES: usize = 3; + cfg_select! { + feature = "loom" => { loom::model(body); }, + feature = "shuttle_dfs" => { shuttle::check_dfs(body, Some(ITERATIONS)); }, + feature = "shuttle_pct" => { shuttle::check_pct(body, ITERATIONS, SCHEDULES); }, + feature = "shuttle" => { shuttle::check_random(body, ITERATIONS); }, + not(any(feature = "loom", feature = "shuttle")) => { body(); }, + _ => compile_error!( + "stress: a model-checker feature is enabled but no dispatch \ + arm matched. Either an explicit arm above is missing, or \ + the `not(any(...))` default needs widening to cover the \ + new feature.", + ), + } +} diff --git a/concurrency/src/sync/mod.rs b/concurrency/src/sync/mod.rs index e70d96b4e9..2ea01b9a6f 100644 --- a/concurrency/src/sync/mod.rs +++ b/concurrency/src/sync/mod.rs @@ -13,24 +13,59 @@ //! `Weak`). //! * `shuttle` / `shuttle_pct` / `shuttle_dfs` features: poison-as-panic //! wrapper around `shuttle::sync`. All three flavours share one -//! wrapper module; the feature lattice means a single -//! `feature = "shuttle"` check is true under every variant. The -//! scheduler difference is runtime-only (see `concurrency::stress`, -//! added in a later PR). +//! wrapper module; the scheduler difference is runtime-only (see +//! `concurrency::stress`). //! * `parking_lot` feature (default): zero-cost re-export of //! `parking_lot`'s naked-guard locks; the production hot path. //! Skipped when `_strict_provenance` is on, even if `parking_lot` //! is also on, because `parking_lot_core::word_lock` uses //! integer-to-pointer casts that miri's strict-provenance mode //! rejects; the CI miri job exercises the fallback slot under -//! strict provenance, and that needs the sync surface to come from -//! `std::sync`. +//! strict provenance, and that needs the sync surface to come +//! from `std::sync`. //! * Otherwise: `std_backend` -- a thin poison-as-panic wrapper around -//! `std::sync`. Lets `--no-default-features` and `_strict_provenance` -//! builds compile without depending on `parking_lot`. +//! `std::sync`. Lets `--no-default-features` and +//! `_strict_provenance` builds compile without depending on +//! `parking_lot`. +//! +//! # Portability footguns the facade *does not* paper over +//! +//! The wrapped backends are observationally compatible with the +//! production `parking_lot` surface for the things call sites +//! actually use, but a few API details diverge in ways that matter +//! to anyone writing a static, a model-checked test, or code that +//! relies on `parking_lot`-specific schedules: +//! +//! * **`Mutex::new` / `RwLock::new` are not `const fn` under +//! `loom`/`shuttle*`.** loom's `Mutex::new` is plain `fn` because +//! each instance registers with the loom executor; shuttle's is +//! `const fn`, but the facade exposes the lowest common +//! denominator. So `static M: Mutex = Mutex::new(...)` compiles +//! under the default and `parking_lot` backends and fails to +//! typecheck under the model-checker backends. Workaround for +//! tests that need a static: wrap the static in `OnceLock`, or +//! construct the `Mutex` inside the test body. +//! +//! * **`OnceLock` under `loom`/`shuttle*` is re-exported from +//! `std::sync` unchanged.** It is sound for laziness, but it uses +//! uninstrumented atomics inside, so the model checker does *not* +//! explore the orderings around `OnceLock::get_or_init`. Tests +//! whose correctness depends on the publication ordering of a +//! once-initialised cell need to model that ordering explicitly +//! (e.g. an `Arc` + an explicit `Acquire` load on the +//! subscriber, both of which loom *does* model). +//! +//! * **`RwLock::upgradable_read` under `loom`/`shuttle*` takes an +//! exclusive write lock.** Sound -- no schedule that `parking_lot` +//! would allow is forbidden -- but lossy: the model checker never +//! explores the many-readers-plus-one-upgradable schedule that +//! `parking_lot` permits. Code whose correctness hinges on that +//! specific interleaving needs an explicit `read()` then `write()` +//! pair (which loom *can* model), or a richer state machine in +//! the facade. -// loom takes priority so the model checker can drive its own internal state -// (used for tests that opt loom in explicitly). +// loom takes priority so the model checker can drive its own internal +// state (used for tests that opt loom in explicitly). #[cfg(all(feature = "loom", not(feature = "silence_clippy")))] mod loom_backend; #[cfg(all(feature = "loom", not(feature = "silence_clippy")))] @@ -44,15 +79,15 @@ pub use shuttle_backend::*; #[cfg(all( not(feature = "loom"), not(feature = "shuttle"), - not(feature = "_strict_provenance"), feature = "parking_lot", + not(feature = "_strict_provenance"), ))] mod parking_lot_backend; #[cfg(all( not(feature = "loom"), not(feature = "shuttle"), - not(feature = "_strict_provenance"), feature = "parking_lot", + not(feature = "_strict_provenance"), ))] pub use parking_lot_backend::*; diff --git a/concurrency/src/sync/std_backend.rs b/concurrency/src/sync/std_backend.rs index 263cba6e07..4313a87f50 100644 --- a/concurrency/src/sync/std_backend.rs +++ b/concurrency/src/sync/std_backend.rs @@ -207,7 +207,7 @@ impl RwLock { /// Acquire an upgradable read guard. /// /// std `RwLock` has no native upgradable-read; this is implemented - /// as an exclusive `write()`. Subsequent backends (parking_lot) + /// as an exclusive `write()`. Subsequent backends (`parking_lot`) /// will replace this with a true upgradable read; meanwhile the /// surface is consistent across backends, sound in all cases, and /// merely loses the many-readers-plus-one-upgradable schedule that diff --git a/concurrency/src/thread/loom_scope.rs b/concurrency/src/thread/loom_scope.rs new file mode 100644 index 0000000000..369ac1d48b --- /dev/null +++ b/concurrency/src/thread/loom_scope.rs @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Loom-only `thread::scope` shim. +//! +//! Loom 0.7 does not ship `scope`. We provide one by storing every +//! spawned `JoinHandle` on the `Scope` itself and joining each handle +//! from the caller of `scope()` before returning. This mirrors what +//! `std::thread::scope` does internally (its `JoinInner::drop` joins the +//! OS thread before signaling the main thread): every spawned thread +//! is fully terminated -- including all its captured drops -- before +//! `scope()` returns, so any `'scope`-bounded borrow the thread held is +//! released *on the thread that joined*, never on the spawned thread +//! after `'scope` has ended. +//! +//! The shim's safety contract therefore matches std's: spawned closures +//! may borrow data of any lifetime that outlives the scope (`'env`). +//! Internally we lift the closure's `'scope` lifetime to `'static` with +//! a single `mem::transmute`, sound because of the join-before-return +//! guarantee. +//! +//! The keepalive trait object stored in `ScopeInner::pending` keeps its +//! honest `'scope` bound; the dropck-vs-HRTB tension that requires for +//! the closure transmute is resolved on this side by wrapping +//! `ScopeInner<'scope>` in `ManuallyDrop` so that `Scope`'s implicit +//! drop never destructs `'scope`-bearing data, and then explicitly +//! `ManuallyDrop::drop`ping the inner at the end of `scope()` while +//! `'scope` is still live. See the SAFETY comments at the manual-drop +//! site and at the closure transmute for details. +//! +//! Loom's `thread::spawn` is stricter than std's `spawn_unchecked` -- +//! it requires `T: 'static` for the return type as well as the closure. +//! To accommodate, the spawned closure here is wrapped to return `()` +//! and write the user-visible `T` into an `Arc>>` that +//! `ScopedJoinHandle::join` reads back. The wrapper itself returns `()` +//! so loom's `'static` bound is trivially satisfied. +//! +//! ## Why the `result_slot` is held in three places, and how drop +//! affinity is enforced +//! +//! Each call to [`Scope::spawn`] produces three references to the same +//! `Arc>>`: +//! +//! 1. **The spawned thread's wrapper closure** writes the user's `T` +//! into the slot when the user closure returns. +//! 2. **The user's [`ScopedJoinHandle`]** lets `.join()` take `T` out; +//! if the handle is dropped without joining, its clone simply +//! decrements the strong count. +//! 3. **The `Scope`'s slot keepalive** is a type-erased third clone +//! held in `ScopeInner::pending`. The auto-join loop in `scope()` +//! walks every pending entry, joins its `JoinHandle`, then calls +//! `ResultKeepalive::take_payload` on the keepalive to extract the +//! `T` and drop it **on the main thread**. The last `Arc` clone +//! (which might be on the spawned thread, if loom's notify fired +//! before the closure's capture-drop completed) then frees an empty +//! `Mutex>` shell with nothing left to destruct. +//! +//! Earlier revisions tried to enforce drop affinity by asserting at +//! teardown that `strong_count == 1`. That works for `std::thread:: +//! scope`, which synchronously waits for the spawned thread's full +//! termination (including capture drops), but not for loom: loom's +//! `JoinHandle::join` is satisfied by the spawned thread's `notify`, +//! which is sequenced *after* `f()` returns but *before* the runtime +//! has finished dropping the box that owned the closure's captures. +//! A schedule where main reaches the assertion in that window +//! observes `strong_count == 2`, so we drop the assertion and run the +//! extract-and-drop on main thread explicitly. + +// The shim has two unsafe operations: (1) a `mem::transmute` that +// lifts the spawned closure's `'scope` lifetime to `'static`, since +// loom 0.7 has no `spawn_unchecked`; (2) an explicit +// `ManuallyDrop::drop` of the inner `ScopeInner<'scope>` at the end +// of `scope()`, which lets the keepalive trait objects keep their +// honest `'scope` bound while dropck does not see them at `scope`'s +// auto-drop. Both are sound because of the join-before-return +// contract; see the per-site SAFETY comments. The crate root denies +// `unsafe_code`, so allow it locally. +#![allow(unsafe_code)] +// The shim panics on internal invariant violations -- same as std's +// `thread::scope`. The crate root denies `clippy::panic`/`expect_used`; +// allow them locally. +#![allow(clippy::panic, clippy::expect_used)] +// `Scope::scope` field is a PhantomData invariance marker matching std's +// internal layout; the name aligns with the lifetime parameter, not a +// stylistic choice. +#![allow(clippy::struct_field_names)] + +use core::marker::PhantomData; +use core::panic::AssertUnwindSafe; +use loom::sync::Arc; +use loom::thread::{self, JoinHandle}; +use std::panic::{catch_unwind, resume_unwind}; + +use crate::sync::Mutex; + +/// Shared slot for a `JoinHandle<()>` that may be claimed either by +/// the user via [`ScopedJoinHandle::join`] or by [`scope`]'s +/// auto-join loop -- whichever runs first takes it out. Both sides +/// hold an `Arc` clone of the same `Mutex>>`. +type SharedJoinSlot = Arc>>>; + +/// A scope for spawning threads that may borrow non-`'static` data. +/// +/// Created by [`scope`]. Mirrors `std::thread::Scope`. +pub struct Scope<'scope, 'env: 'scope> { + inner: Mutex>>, + /// Invariance over `'scope` (matches std). Without it, `'scope` + /// could shrink and the unsafe lifetime launder would be unsound. + scope: PhantomData<&'scope mut &'scope ()>, + env: PhantomData<&'env mut &'env ()>, +} + +/// Trait-object behind which each spawn's `Arc>>` +/// keepalive lives. Exists so the `scope()` teardown loop can call +/// `take_payload()` to extract the `T` and drop it on the main thread, +/// regardless of how many `Arc` clones remain on other threads. +trait ResultKeepalive: Send { + /// Take the inner `Option::take()`, dropping the contained `T` + /// on the caller's thread (main, in `scope()`'s teardown loop). + /// + /// Drop-affinity is enforced by this take, not by `Arc` count: any + /// remaining `Arc>>` clones (e.g. a slow-dropping + /// `result_for_thread` whose owning thread has notified-but-not- + /// fully-exited) will then see an `Option::None` and run no + /// `T::Drop` of their own. The last `Arc` to drop frees the empty + /// `Mutex>` shell, which has no `T` to destruct. + fn take_payload(&self); +} + +impl ResultKeepalive for Arc>> { + fn take_payload(&self) { + let _ = self.lock().take(); + } +} + +struct ScopeInner<'scope> { + /// Pairs of `(shared_handle_slot, slot_keepalive)`. The keepalive + /// is the third clone of each spawn's `Arc>>`, + /// behind a small `ResultKeepalive + 'scope` trait object. The + /// `'scope` bound is honest: the inner `Arc>>` + /// holds a `T: 'scope`, and the Vec keeps the trait object alive + /// until `scope()`'s teardown drops it (which happens before + /// `'scope` ends). + pending: Vec<(SharedJoinSlot, Box)>, +} + +/// An owned handle to a thread spawned via [`Scope::spawn`]. +/// +/// Dropping the handle does **not** detach the thread -- the auto-join +/// in [`scope`] still waits for it. To collect the thread's result or +/// panic, call [`ScopedJoinHandle::join`] before [`scope`] returns. +pub struct ScopedJoinHandle<'scope, T> { + /// Shared with `Scope::inner.pending`. Whoever calls + /// `lock().take()` first claims the handle: `ScopedJoinHandle::join` + /// in the user path, the teardown loop in `scope` otherwise. + handle_slot: SharedJoinSlot, + result: Arc>>, + _scope: PhantomData<&'scope ()>, +} + +impl ScopedJoinHandle<'_, T> { + /// Wait for the spawned thread to finish and return its result. + /// + /// # Errors + /// + /// Returns `Err` with the panic payload if the spawned thread + /// panicked. The surrounding [`scope`] will not double-panic in + /// that case: an explicitly joined handle absorbs the panic. + /// + /// # Panics + /// + /// Panics if the handle slot or result slot is empty, which would + /// indicate a double-join or a wrapper closure that never + /// deposited its result. Both are internal invariant violations, + /// not user-visible conditions. + pub fn join(self) -> std::thread::Result { + let handle = self + .handle_slot + .lock() + .take() + .expect("scoped thread handle was already taken (double join?)"); + handle.join()?; + Ok(self + .result + .lock() + .take() + .expect("scoped thread did not deposit its result")) + } +} + +/// Spawn scoped threads, joining all of them before returning. +/// +/// See `std::thread::scope` for the full API contract. The shim matches +/// that contract under loom. +/// +/// # Panics +/// +/// Propagates any panic from `f` after all spawned threads have been +/// joined. If `f` itself didn't panic but any spawned thread did and +/// the panic was never absorbed by an explicit `.join()`, panics with +/// `"a scoped thread panicked"`. +pub fn scope<'env, F, T>(f: F) -> T +where + F: for<'scope> FnOnce(&'scope Scope<'scope, 'env>) -> T, +{ + let scope = Scope { + inner: Mutex::new(core::mem::ManuallyDrop::new(ScopeInner { + pending: Vec::new(), + })), + scope: PhantomData, + env: PhantomData, + }; + + // Run `f` inside `catch_unwind` so we can still wait for spawned + // threads even if `f` panicked. + let result = catch_unwind(AssertUnwindSafe(|| f(&scope))); + + // Drain pending entries. For each, try to claim the handle from + // the shared slot; if it's `None`, the user already joined. Then + // drop the keepalive, which (now that the spawned thread has + // fully exited and dropped its own `Arc` clone of the result + // slot) lets `T`'s destructor run on this -- the main -- thread. + // + // If a spawned thread panicked, capture the first panic payload so + // we can `resume_unwind` it at the end -- matching `std::thread::scope`, + // which preserves the spawned thread's original assertion/panic + // message instead of synthesizing a generic one. We still join every + // handle so subsequent panics' associated keepalives get dropped on + // the main thread before we propagate. + // + // The drain is a loop, not a single take: a scoped thread can + // itself call `s.spawn(...)` and push a new pending entry while + // we're joining earlier ones. Taking `pending` once would leave + // those nested handles unjoined and violate the + // join-before-return contract the `'scope` -> `'static` lifetime + // transmute relies on. Loop until the queue stays empty. + let mut first_spawn_panic: Option> = None; + loop { + let pending = core::mem::take(&mut scope.inner.lock().pending); + if pending.is_empty() { + break; + } + for (handle_slot, keepalive) in pending { + if let Some(handle) = handle_slot.lock().take() + && let Err(payload) = handle.join() + && first_spawn_panic.is_none() + { + first_spawn_panic = Some(payload); + } + // Drop-affinity: explicitly take the `Option` payload out + // of the slot on this (the main) thread. `T::Drop` runs + // here, regardless of how many `Arc` clones of the slot + // still exist on other threads. The last `Arc` to drop + // (possibly on the spawned thread, in some interleavings + // where the spawned thread's wrapper has notified but + // hasn't fully released its capture) then frees the empty + // shell, which contains no `T` to destruct. + // + // This is stricter than std's `Drop` ordering (std relies + // on `JoinHandle::join()` synchronously waiting for the + // spawned thread to fully terminate, including capture + // drops). loom's `JoinHandle::join` only synchronises on + // the spawned thread's notify, which can fire before all + // captures have dropped -- so we can't rely on the Arc + // count being exactly 1 here. + keepalive.take_payload(); + drop(keepalive); + } + } + + // SAFETY: `scope.inner` wraps `ScopeInner<'scope>` in + // `ManuallyDrop` so that the auto-drop of `scope` (a local + // bound by the function block's lifetime) does not destruct + // `'scope`-bearing data -- that would force `'scope` to + // outlive `scope`, but `'scope` is fixed by the HRTB-chosen + // borrow at `f(&scope)` and is necessarily shorter than + // `scope`'s local lifetime. We are still inside `scope()`'s + // body here, so `'scope` is alive, the explicit + // `ManuallyDrop::drop` is the correct place to release the + // (now-emptied) `Vec` allocation. The inner is never accessed + // again after this point: the function only matches `result` + // and returns. Loom 0.7's leak check at the end of each + // `loom::model` iteration would otherwise flag the leaked + // allocation. + unsafe { + core::mem::ManuallyDrop::drop(&mut scope.inner.lock()); + } + + match result { + // The `f` body itself panicked. Its panic dominates (it's the + // outermost frame), so propagate it. A spawned panic captured + // in `first_spawn_panic` is silently dropped on this path, + // matching std's behaviour. + Err(e) => resume_unwind(e), + // No body panic, but at least one spawned thread did. Resume the + // first spawned panic with its original payload -- preserves + // assertion messages and any other diagnostic carried in the + // payload. (std does the same thing via JoinInner::drop + + // a_thread_panicked.) + Ok(_) if first_spawn_panic.is_some() => { + resume_unwind(first_spawn_panic.expect("just checked")) + } + Ok(r) => r, + } +} + +impl<'scope> Scope<'scope, '_> { + /// Spawn a thread within the scope. + /// + /// The closure may borrow data of any lifetime that outlives the + /// scope (i.e. `'env`). The scope guarantees the thread is joined + /// before [`scope`] returns, so those borrows remain valid for the + /// duration of the thread. + pub fn spawn(&'scope self, f: F) -> ScopedJoinHandle<'scope, T> + where + F: FnOnce() -> T + Send + 'scope, + T: Send + 'scope, + { + let result_slot: Arc>> = Arc::new(Mutex::new(None)); + let result_for_thread = Arc::clone(&result_slot); + // Third clone, kept alive by the Scope itself until after the + // thread is joined. See module docs ("Why the result_slot is + // held in three places"). + let result_keepalive = Arc::clone(&result_slot); + + let wrapped = move || { + // Mirror std: catch the panic and resume so loom sees the + // thread terminate with a panic. The scope's `pending` + // loop will record the panic via `JoinHandle::join()`. + // + // `result_for_thread` (the spawned-thread Arc clone of the + // result slot) is dropped implicitly when the closure body + // exits. We do not rely on that drop happening before + // `scope()`'s teardown runs `T::Drop`; loom's `JoinHandle:: + // join` synchronises only on `notify`, which can fire before + // the closure's captures have fully been released. + // `scope()`'s teardown calls `take_payload` to drop the `T` + // on the main thread regardless of the Arc count, which is + // what the keepalive trait's contract guarantees. + match catch_unwind(AssertUnwindSafe(f)) { + Ok(v) => { + *result_for_thread.lock() = Some(v); + } + Err(e) => resume_unwind(e), + } + }; + + // SAFETY: `loom::thread::spawn` requires `F: 'static` (no + // `spawn_unchecked` is available in loom 0.7), so we have to + // lifetime-launder the closure box from `'scope` to `'static`. + // Soundness rests on the join-before-return contract: every + // spawned thread is joined by `scope()`'s teardown loop before + // `scope()` returns. By that time the closure has run, its + // captures (including `result_for_thread`, the only capture + // bound to `'scope`) have dropped, and the user-visible + // `ScopedJoinHandle.result` has dropped (the handle's `'scope` + // bound forces it). loom's `JoinHandle::join` synchronises on + // `notify`, which the spawned thread emits after `f()` returns; + // by that point the wrapper closure's captures are gone. The + // `take_payload` step in the teardown loop additionally drops + // any leftover `T` on the main thread, so even in interleavings + // where the spawned thread's `Arc` clone of the result slot + // outlives `notify`, `T::Drop` does not run on the spawned + // thread. This is the same lifetime-launder pattern std uses + // for `spawn_unchecked` internally. + let wrapped: Box = unsafe { + core::mem::transmute::< + Box, + Box, + >(Box::new(wrapped)) + }; + + let join_handle = thread::spawn(wrapped); + + // Shared handle slot: `scope()` and the user's + // `ScopedJoinHandle` both hold an `Arc` clone of the same + // `Mutex>>`. Whoever calls + // `lock().take()` first claims the join. + let handle_slot: SharedJoinSlot = Arc::new(Mutex::new(Some(join_handle))); + let handle_for_scope = Arc::clone(&handle_slot); + + // No lifetime launder needed: `ScopeInner<'scope>` carries the + // `'scope` parameter on the `Box` + // it stores, so the trait object's lifetime is honest. The + // for-all-`'scope` HRTB on `scope()`'s `F` resolves because + // `Scope<'scope, 'env>` is already parameterised by `'scope` + // and `'scope`'s invariance (the `PhantomData<&'scope mut + // &'scope ()>`) keeps the chosen `'scope` from shrinking. + let keepalive: Box = Box::new(result_keepalive); + self.inner + .lock() + .pending + .push((handle_for_scope, keepalive)); + + ScopedJoinHandle { + handle_slot, + result: result_slot, + _scope: PhantomData, + } + } +} diff --git a/concurrency/src/thread/mod.rs b/concurrency/src/thread/mod.rs new file mode 100644 index 0000000000..d468568de9 --- /dev/null +++ b/concurrency/src/thread/mod.rs @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Backend-routed threading primitives. +//! +//! Re-exports the active backend's `thread` module wholesale (`spawn`, +//! `current`, `sleep`, `yield_now`, `JoinHandle`, `Thread`, `ThreadId`, +//! `Builder`, ...) so call sites use one path regardless of whether +//! they're building against `std`, `loom`, or `shuttle`. +//! +//! ## `thread::scope` +//! +//! `std::thread::scope` (stable since 1.63) and `shuttle::thread::scope` +//! are re-exported directly. `loom` 0.7 does not provide `scope`, so we +//! ship a local shim in [`loom_scope`] that matches the std API on top +//! of loom's `spawn` + `park`/`unpark` + atomic primitives, with a +//! narrow `unsafe` lifetime launder (same trick std uses internally). +//! +//! Tests written in terms of `concurrency::thread::scope` work +//! identically across every backend; no `Box::into_raw`/`'static` +//! workarounds at call sites. + +#[cfg(not(any(feature = "loom", feature = "shuttle")))] +pub use std::thread::*; + +#[cfg(all( + feature = "shuttle", + not(feature = "loom"), + not(feature = "silence_clippy") +))] +pub use shuttle::thread::*; + +#[cfg(all(feature = "loom", not(feature = "silence_clippy")))] +pub use loom::thread::*; + +#[cfg(all(feature = "loom", not(feature = "silence_clippy")))] +mod loom_scope; + +#[cfg(all(feature = "loom", not(feature = "silence_clippy")))] +pub use loom_scope::{Scope, ScopedJoinHandle, scope}; + +// Match the silence_clippy escape hatch in `crate::sync`: under +// `--all-features` both loom and shuttle are enabled at once, which +// can't pick a single backend. Route to `std::thread` so the binary +// type-checks; it is never executed in that configuration. +#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))] +pub use std::thread::*; diff --git a/concurrency/tests/arc_weak.rs b/concurrency/tests/arc_weak.rs new file mode 100644 index 0000000000..6fd38fc3ab --- /dev/null +++ b/concurrency/tests/arc_weak.rs @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Direct coverage for the `concurrency::sync::Arc` wrapper and +//! `Weak` shim. +//! +//! Loom 0.7 does not ship `Weak` and does not give `loom::sync::Arc` +//! an associated `downgrade` function. The crate adds both as a thin +//! wrapper around `loom::sync::Arc` (see `concurrency/src/sync/test_facade.rs`). +//! Because the shim is custom code -- not a re-export -- it needs its +//! own test coverage; otherwise the only thing exercising it is +//! `quiescent_model.rs`, which uses it as a building block and would +//! surface failures as misbehaving QSBR tests rather than as +//! localised shim bugs. +//! +//! Run under loom with: +//! +//! ```sh +//! cargo test --release -p dataplane-concurrency --features loom --test arc_weak +//! ``` +//! +//! The tests also pass on the default and shuttle backends -- the +//! contract is the same; only the *internals* of `Arc`/`Weak` differ. +//! Documented quirks of the loom shim (e.g. `Weak::upgrade` succeeds +//! even after the last `Arc` drop, `weak_count` is always `0`) have +//! tests gated to `concurrency = "loom"` to avoid asserting on real +//! `std::sync` / `shuttle::sync` semantics. +//! +//! `shuttle_pct` is opted out at file level: PCT is for biasing toward +//! rare interleavings of concurrent code, but most of the tests in this +//! file are protocol-level checks on `Arc` / `Weak` and either run on a +//! single thread or only briefly spawn a helper. PCT panics on bodies +//! that do not exercise sustained concurrency on the main thread, and +//! the contract being tested here is identical to what the plain +//! `shuttle` (random) variant already covers. + +#![cfg(not(feature = "shuttle_pct"))] + +// `#[concurrency::test]` is provided by `dataplane-concurrency`; alias +// the crate so the macro path resolves inside this integration test. +extern crate dataplane_concurrency as concurrency; + +use dataplane_concurrency::sync::Arc; +use dataplane_concurrency::sync::atomic::{AtomicUsize, Ordering}; +use dataplane_concurrency::sync::{Mutex, Weak}; +use dataplane_concurrency::thread; + +#[concurrency::test] +fn arc_new_strong_count_is_one() { + let a = Arc::new(42u32); + assert_eq!(Arc::strong_count(&a), 1); +} + +#[concurrency::test] +fn arc_clone_then_drop_round_trips_strong_count() { + let a = Arc::new(42u32); + let b = a.clone(); + assert!(Arc::strong_count(&a) >= 2); + drop(b); + // After `b` drops, `a` is the only remaining strong (modulo any + // `Weak`-quirk count contributions, none here). + assert_eq!(Arc::strong_count(&a), 1); +} + +#[concurrency::test] +fn arc_ptr_eq_same_allocation_is_true() { + let a = Arc::new(42u32); + let b = a.clone(); + assert!(Arc::ptr_eq(&a, &b)); +} + +#[concurrency::test] +fn arc_ptr_eq_different_allocations_is_false() { + let a = Arc::new(42u32); + let b = Arc::new(42u32); + assert!(!Arc::ptr_eq(&a, &b)); +} + +#[concurrency::test] +fn weak_new_upgrades_to_none() { + let w: Weak = Weak::new(); + assert!(w.upgrade().is_none()); +} + +#[concurrency::test] +fn arc_downgrade_then_upgrade_returns_value() { + let a = Arc::new(42u32); + let w = Arc::downgrade(&a); + let upgraded = w.upgrade().expect("upgrade of fresh weak should succeed"); + assert_eq!(*upgraded, 42); +} + +#[concurrency::test] +fn arc_new_uninit_then_assume_init_round_trip() { + let mut uninit: Arc> = Arc::new_uninit(); + let slot = Arc::get_mut(&mut uninit).expect("sole strong reference"); + slot.write(42); + // SAFETY: just initialised via `write`. + #[allow(unsafe_code)] + let init = unsafe { uninit.assume_init() }; + assert_eq!(*init, 42); +} + +#[concurrency::test] +fn weak_into_raw_from_raw_round_trips() { + let a = Arc::new(42u32); + let w = Arc::downgrade(&a); + let raw = w.into_raw(); + // SAFETY: `a` is still alive, so `raw` points at a live allocation. + #[allow(unsafe_code)] + let value = unsafe { *raw }; + assert_eq!(value, 42); + // SAFETY: `raw` came from `Weak::into_raw`, never used elsewhere. + #[allow(unsafe_code)] + let recovered = unsafe { Weak::from_raw(raw) }; + let upgraded = recovered.upgrade().expect("upgrade after round-trip"); + assert_eq!(*upgraded, 42); +} + +#[concurrency::test] +fn arc_display_forwards_to_inner() { + let a = Arc::new(42u32); + assert_eq!(format!("{a}"), "42"); +} + +#[concurrency::test] +fn arc_pointer_format_yields_address() { + let a = Arc::new(42u32); + // The exact representation is `0x...` on every platform we + // target; just check the format is non-empty and starts with `0x`. + let p = format!("{a:p}"); + assert!(p.starts_with("0x"), "pointer format unexpected: {p}"); +} + +// ---------- documented-quirk tests (loom-only) ---------- + +/// Under the loom shim, `Weak` holds a strong clone of the inner +/// `loom::sync::Arc`, so `upgrade` succeeds even after every original +/// `Arc` has dropped. This is the documented limitation explained in +/// the module-level docs of `concurrency/src/sync/loom_backend.rs`; +/// the test pins the behaviour so a future "real `Weak`" +/// implementation fails this test loudly rather than silently +/// changing semantics. +#[cfg(feature = "loom")] +#[concurrency::test] +fn loom_quirk_weak_keeps_strong_alive() { + let a = Arc::new(42u32); + let w = Arc::downgrade(&a); + drop(a); + // Under real `std::sync::Weak` semantics, this would be `None`. + // Under the loom shim, the `Weak` itself holds a strong clone. + let upgraded = w.upgrade().expect("loom shim quirk: Weak keeps strong"); + assert_eq!(*upgraded, 42); +} + +// ---------- multi-thread (loom multiplies via scheduling) ---------- + +/// Two threads each clone, read, and drop independent `Arc` clones. +/// Loom explores all interleavings of the strong-count operations. +#[concurrency::test] +fn two_threads_clone_and_drop_independently() { + let a = Arc::new(42u32); + let a1 = a.clone(); + let a2 = a.clone(); + let h1 = thread::spawn(move || { + assert_eq!(*a1, 42); + }); + let h2 = thread::spawn(move || { + assert_eq!(*a2, 42); + }); + h1.join().unwrap(); + h2.join().unwrap(); + // After the spawned threads have joined and dropped their + // clones, only `a` remains. + assert_eq!(Arc::strong_count(&a), 1); +} + +/// A `Weak` registered in a `Mutex`-protected slot survives concurrent +/// reader access. This is a tiny analogue of the QSBR usage pattern in +/// `nat::stateful::apalloc`: a `Weak` slot upgraded by a reader +/// thread while another thread holds an `Arc` to the value. +#[concurrency::test] +fn mutex_protected_weak_slot_upgrade() { + let a = Arc::new(99u32); + let slot: Arc>>> = Arc::new(Mutex::new(Some(Arc::downgrade(&a)))); + let slot_for_thread = Arc::clone(&slot); + let read = Arc::new(AtomicUsize::new(0)); + let read_for_thread = Arc::clone(&read); + let h = thread::spawn(move || { + let guard = slot_for_thread.lock(); + if let Some(w) = guard.as_ref() + && let Some(inner) = w.upgrade() + { + read_for_thread.store(*inner as usize, Ordering::SeqCst); + } + }); + h.join().unwrap(); + assert_eq!(read.load(Ordering::SeqCst), 99); + drop(a); +} diff --git a/concurrency/tests/quiescent_loom.rs b/concurrency/tests/quiescent_loom.rs deleted file mode 100644 index ac8fe5038e..0000000000 --- a/concurrency/tests/quiescent_loom.rs +++ /dev/null @@ -1,245 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// Copyright Open Network Fabric Authors - -//! Loom model-checking tests for `dataplane_quiescent`. -//! -//! These tests run only under `--features loom`. Standard protocol -//! tests live in `tests/protocol.rs`; bolero properties in -//! `tests/properties.rs`; bolero x shuttle in `tests/shuttle.rs`. -//! -//! Run with: -//! -//! ```sh -//! cargo test --release -p dataplane-quiescent --features loom --test loom -//! ``` -//! -//! ## Why the `unsafe` -//! -//! Loom 0.7.2 doesn't expose `thread::scope`, only `thread::spawn`, -//! which requires `'static`. But the new lifetime-bounded API gives -//! us a `Subscriber<'p, T>` that borrows from the `Publisher` -- there -//! is no `'static` to satisfy `thread::spawn` with. -//! -//! Workaround: each `loom::model` iteration boxes a fresh `Publisher`, -//! lifts it to `&'static` via `Box::into_raw` for the body of the -//! iteration, and recovers the `Box` at the end (so loom's Arc-leak -//! audit is satisfied). The unsafe is local, narrow, and well-paired: -//! every `into_raw` has a matching `from_raw`. -//! -//! `Box::leak` on its own would not work -- loom audits `Arc` cleanup -//! at the end of every model iteration and panics on leaked clones. -//! -//! ## Sizing -//! -//! Loom explores all legal interleavings of the operations inside each -//! `loom::model(|| { ... })` block. Keep test bodies minimal -- each -//! extra atomic op multiplies the search space. Two threads with one -//! atomic op each is roughly the right shape; "2 publishes + 2 -//! subscribers + a drop" already explodes. - -#![cfg(feature = "loom")] - -use loom::thread; - -use dataplane_concurrency::quiescent::{Publisher, channel}; - -/// Run `body` with a `&'static` reference to a freshly-constructed -/// `Publisher`. After `body` returns, recover the `Box` and drop the -/// `Publisher` so loom's Arc-leak audit is satisfied. -/// -/// The `'static` lifetime is real for the duration of `body` (the -/// `Publisher` is live in heap-allocated memory until `Box::from_raw` -/// runs after `body`). Caller must not retain any references derived -/// from the `&'static Publisher` past the return of `body`. -fn with_static_publisher(body: F) -where - F: FnOnce(&'static Publisher), -{ - let raw: *mut Publisher = Box::into_raw(Box::new(channel(0u32))); - // SAFETY: `raw` was just produced by `Box::into_raw` and is not - // freed until the matching `Box::from_raw` below. No aliasing - // occurs: `body` consumes the only handle. - let publisher: &'static Publisher = unsafe { &*raw }; - body(publisher); - // SAFETY: `body` has returned and the contract requires no - // outstanding references to `publisher`. `raw` is still the - // unique pointer to the heap allocation. - drop(unsafe { Box::from_raw(raw) }); -} - -/// A snapshot taken after a publish must observe a value the Publisher -/// ever stored. Under any interleaving of `publish` vs `snapshot`, the -/// Subscriber sees either the initial or the published value, never -/// anything else (no torn reads, no use-after-free). -#[test] -fn snapshot_observes_a_legal_value() { - loom::model(|| { - with_static_publisher(|publisher| { - let factory = publisher.factory(); - - let sub_handle = thread::spawn(move || { - let mut sub = factory.subscriber(); - let observed = *sub.snapshot(); - assert!( - observed == 0 || observed == 1, - "Subscriber observed illegal value {observed}", - ); - }); - - publisher.publish(1u32); - sub_handle.join().unwrap(); - }); - }); -} - -/// A Subscriber that takes a snapshot before the Publisher publishes, -/// then is dropped concurrently with the Publisher's reclaim, must not -/// deadlock and must not leave the protocol in an inconsistent state. -#[test] -fn subscriber_drop_during_publish_is_safe() { - loom::model(|| { - with_static_publisher(|publisher| { - let factory = publisher.factory(); - - let sub_handle = thread::spawn(move || { - let mut sub = factory.subscriber(); - let _ = *sub.snapshot(); - // Subscriber drops at end of thread; concurrent with publisher below. - }); - - publisher.publish(1u32); - publisher.reclaim(); - sub_handle.join().unwrap(); - }); - }); -} - -/// A Subscriber that snapshots after `publish` returns must observe the -/// published value, not the initial. This pins down the -/// publish-then-snapshot ordering. -#[test] -fn snapshot_after_publish_observes_published() { - loom::model(|| { - with_static_publisher(|publisher| { - let mut sub = publisher.factory().subscriber(); - publisher.publish(1u32); - let observed = *sub.snapshot(); - assert_eq!( - observed, 1, - "snapshot taken after publish() returns must observe the published value", - ); - }); - }); -} - -/// Subscriber registered before publish, snapshot taken after -- should -/// observe the published value. The 0-sentinel branch in -/// `min_observed` must not turn this into a use-after-free. -#[test] -fn registered_then_publish_then_snapshot() { - loom::model(|| { - with_static_publisher(|publisher| { - let factory = publisher.factory(); - - let sub_handle = thread::spawn(move || { - let mut sub = factory.subscriber(); - // Snapshot may race with publish. Either way, we must see - // a legal value. - let observed = *sub.snapshot(); - assert!(observed == 0 || observed == 1); - }); - - publisher.publish(1u32); - publisher.reclaim(); - sub_handle.join().unwrap(); - }); - }); -} - -// ===================================================================== -// Drop affinity: every `Versioned` destructor must run on the -// Publisher's thread. This is the headline guarantee of the crate; -// the existing tests above check legality and absence of deadlocks but -// do not verify the drop-thread invariant under all interleavings. -// ===================================================================== - -/// Payload whose `Drop` records the thread on which it ran. We use -/// `std::sync::Mutex` for the recording slot because loom doesn't need -/// to model contention on it (only one drop per `Versioned`, and we -/// only care about the thread id, not the order of records). -struct DropMarker { - drops: std::sync::Arc>>, -} - -impl Drop for DropMarker { - fn drop(&mut self) { - self.drops - .lock() - .expect("recording mutex poisoned") - .push(loom::thread::current().id()); - } -} - -/// Verifies the drop-affinity invariant under all loom interleavings. -/// -/// Setup: Publisher publishes a fresh marker (the initial goes into -/// `retired`) while a Subscriber thread snapshots and then drops. Any -/// interleaving of those two threads must result in **all** -/// `Versioned` destructors running on the Publisher's thread. In -/// particular: the race where Subscriber's `cached = None` decrement -/// of `Versioned`'s strong count and Publisher's `retired.clear()` -/// decrement of the same atomic could (on weak memory) reorder, is -/// enforced by the Acquire fence in `min_observed` after the -/// `Arc::strong_count == 1` check. -#[test] -fn destructor_of_initial_runs_on_publisher_thread() { - loom::model(|| { - let drops: std::sync::Arc>> = - std::sync::Arc::new(std::sync::Mutex::new(Vec::new())); - - let initial = DropMarker { - drops: std::sync::Arc::clone(&drops), - }; - let raw: *mut Publisher = Box::into_raw(Box::new(channel(initial))); - // SAFETY: `raw` is the unique pointer to the heap allocation; - // the matching `Box::from_raw` runs after all spawned work has - // joined and no references derived from `publisher` survive. - let publisher: &'static Publisher = unsafe { &*raw }; - - let publisher_thread = loom::thread::current().id(); - - // Subscriber thread: snapshot then drop. Race against the - // publisher's publish/reclaim below. - let factory = publisher.factory(); - let drops_for_pub = std::sync::Arc::clone(&drops); - let sub_handle = thread::spawn(move || { - let mut sub = factory.subscriber(); - let _ = sub.snapshot(); - // sub drops at end of thread; concurrent with publisher. - }); - - // Publisher publishes a new marker (initial goes into retired). - publisher.publish(DropMarker { - drops: drops_for_pub, - }); - - sub_handle.join().unwrap(); - // Force a final reclaim pass so retired drains deterministically. - publisher.reclaim(); - - // SAFETY: subscriber thread has joined; no references derived - // from `publisher` are still in use. - drop(unsafe { Box::from_raw(raw) }); - - // Every recorded drop must have happened on the publisher - // (main) thread. - let recorded = drops.lock().expect("recording mutex poisoned"); - for (i, t) in recorded.iter().enumerate() { - assert_eq!( - *t, publisher_thread, - "DropMarker {i} ran its destructor on {t:?}, \ - not the publisher thread {publisher_thread:?}", - ); - } - }); -} diff --git a/concurrency/tests/quiescent_model.rs b/concurrency/tests/quiescent_model.rs new file mode 100644 index 0000000000..2ad62e556d --- /dev/null +++ b/concurrency/tests/quiescent_model.rs @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Model-checking tests for `dataplane_concurrency::quiescent`. +//! +//! Each test is marked `#[concurrency::test]`, which routes the body to +//! whichever backend is active: +//! +//! * default -- runs the body once directly (smoke test) +//! * `loom` -- exhaustive interleaving exploration via `loom::model` +//! * `shuttle` / `shuttle_pct` / `shuttle_dfs` -- randomized / PCT / +//! DFS schedule exploration +//! +//! Run under loom (the headline use case) with: +//! +//! ```sh +//! cargo test --release -p dataplane-concurrency --features loom --test quiescent_model +//! ``` +//! +//! Standard protocol tests (real OS threads + `thread::scope` + sleeps) +//! live in `tests/quiescent_protocol.rs`; bolero property tests in +//! `tests/quiescent_properties.rs`; bolero x shuttle in +//! `tests/quiescent_shuttle.rs`. +//! +//! ## Sizing +//! +//! Loom explores all legal interleavings of the operations inside each +//! invocation. Keep test bodies minimal -- each extra atomic op +//! multiplies the search space. Two threads with one atomic op each is +//! roughly the right shape; "2 publishes + 2 subscribers + a drop" +//! already explodes. + +// The proc macro `#[concurrency::test]` expands to `::concurrency::stress(...)`. +// Inside the crate's own integration tests we don't have a `concurrency` Cargo +// alias (cargo rejects self-deps), so alias the crate manually. +extern crate dataplane_concurrency as concurrency; + +use concurrency::quiescent::channel; +use concurrency::thread; + +/// A snapshot taken after a publish must observe a value the Publisher +/// ever stored. Under any interleaving of `publish` vs `snapshot`, the +/// Subscriber sees either the initial or the published value, never +/// anything else (no torn reads, no use-after-free). +#[concurrency::test] +fn snapshot_observes_a_legal_value() { + let publisher = channel(0u32); + thread::scope(|s| { + let factory = publisher.factory(); + s.spawn(move || { + let mut sub = factory.subscriber(); + let observed = *sub.snapshot(); + assert!( + observed == 0 || observed == 1, + "Subscriber observed illegal value {observed}", + ); + }); + publisher.publish(1u32); + }); +} + +/// A Subscriber that takes a snapshot before the Publisher publishes, +/// then is dropped concurrently with the Publisher's reclaim, must not +/// deadlock and must not leave the protocol in an inconsistent state. +#[concurrency::test] +fn subscriber_drop_during_publish_is_safe() { + let publisher = channel(0u32); + thread::scope(|s| { + let factory = publisher.factory(); + s.spawn(move || { + let mut sub = factory.subscriber(); + let _ = *sub.snapshot(); + // Subscriber drops at end of thread; concurrent with publisher below. + }); + publisher.publish(1u32); + publisher.reclaim(); + }); +} + +/// A Subscriber that snapshots after `publish` returns must observe the +/// published value, not the initial. This pins down the +/// publish-then-snapshot ordering. +/// +/// Skipped under `shuttle_pct`: this test is single-threaded by design +/// and PCT specifically panics on closures that don't exercise +/// concurrency. The other backends accept it. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn snapshot_after_publish_observes_published() { + let publisher = channel(0u32); + let mut sub = publisher.factory().subscriber(); + publisher.publish(1u32); + let observed = *sub.snapshot(); + assert_eq!( + observed, 1, + "snapshot taken after publish() returns must observe the published value", + ); +} + +/// Subscriber registered before publish, snapshot taken after -- should +/// observe the published value. The 0-sentinel branch in +/// `min_observed` must not turn this into a use-after-free. +#[concurrency::test] +fn registered_then_publish_then_snapshot() { + let publisher = channel(0u32); + thread::scope(|s| { + let factory = publisher.factory(); + s.spawn(move || { + let mut sub = factory.subscriber(); + // Snapshot may race with publish. Either way, we must see + // a legal value. + let observed = *sub.snapshot(); + assert!(observed == 0 || observed == 1); + }); + publisher.publish(1u32); + publisher.reclaim(); + }); +} + +// ===================================================================== +// Drop affinity: every `Versioned` destructor must run on the +// Publisher's thread. This is the headline guarantee of the crate; +// the existing tests above check legality and absence of deadlocks but +// do not verify the drop-thread invariant under all interleavings. +// ===================================================================== + +/// Payload whose `Drop` records the thread on which it ran. We use +/// `std::sync::Mutex` for the recording slot because the model checker +/// doesn't need to model contention on it (only one drop per +/// `Versioned`, and we only care about the thread id, not the order of +/// records). +struct DropMarker { + drops: std::sync::Arc>>, +} + +impl Drop for DropMarker { + fn drop(&mut self) { + self.drops + .lock() + .expect("recording mutex poisoned") + .push(thread::current().id()); + } +} + +/// Verifies the drop-affinity invariant under all interleavings the +/// active backend explores. +/// +/// Setup: Publisher publishes a fresh marker (the initial goes into +/// `retired`) while a Subscriber thread snapshots and then drops. Any +/// interleaving of those two threads must result in **all** +/// `Versioned` destructors running on the Publisher's thread. In +/// particular: the race where Subscriber's `cached = None` decrement +/// of `Versioned`'s strong count and Publisher's `retired.clear()` +/// decrement of the same atomic could (on weak memory) reorder, is +/// enforced by the Acquire fence in `min_observed` after the +/// `Arc::strong_count == 1` check. +#[concurrency::test] +fn destructor_of_initial_runs_on_publisher_thread() { + let drops: std::sync::Arc>> = + std::sync::Arc::new(std::sync::Mutex::new(Vec::new())); + + let initial = DropMarker { + drops: std::sync::Arc::clone(&drops), + }; + let publisher = channel(initial); + + let publisher_thread = thread::current().id(); + + thread::scope(|s| { + // Subscriber thread: snapshot then drop. Race against the + // publisher's publish/reclaim below. + let factory = publisher.factory(); + s.spawn(move || { + let mut sub = factory.subscriber(); + let _ = sub.snapshot(); + // sub drops at end of thread; concurrent with publisher. + }); + + // Publisher publishes a new marker (initial goes into retired). + publisher.publish(DropMarker { + drops: std::sync::Arc::clone(&drops), + }); + }); + + // Force a final reclaim pass so retired drains deterministically. + publisher.reclaim(); + drop(publisher); + + // Every recorded drop must have happened on the publisher + // (main) thread. + let recorded = drops.lock().expect("recording mutex poisoned"); + for (i, t) in recorded.iter().enumerate() { + assert_eq!( + *t, publisher_thread, + "DropMarker {i} ran its destructor on {t:?}, \ + not the publisher thread {publisher_thread:?}", + ); + } +} diff --git a/concurrency/tests/scope_property.rs b/concurrency/tests/scope_property.rs new file mode 100644 index 0000000000..a13a6a3e53 --- /dev/null +++ b/concurrency/tests/scope_property.rs @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Bolero property test for `thread::scope`. +//! +//! Generates a [`Plan`] (a small number of spawned threads, each with a +//! small number of `fetch_add` ops on a shared counter) via bolero, +//! then runs each plan under the active backend. Each bolero iteration +//! is one *shape* (spawn count, per-spawn op count); under shuttle each +//! shape gets exercised against one randomly chosen schedule. Many +//! bolero iterations widen both axes cheaply. +//! +//! This is the cheap-per-call counterpart to `tests/loom_scope.rs`'s +//! hand-picked scenarios. Loom-style exhaustive exploration of the +//! shim under a large random plan would blow up; bolero x shuttle gets +//! breadth where loom would only give depth on a tiny case. +//! +//! The headline property is conservation: at `scope()` return, the +//! shared counter must equal the sum of all increments the spawned +//! threads were instructed to perform. If `scope()` returned without +//! joining a thread (loom shim bug), or if any `Drop` running outside +//! the scope clobbered the count, this assertion fires. +//! +//! Loom is deliberately excluded -- the search space explodes with +//! large plans. Use `tests/loom_scope.rs` for loom coverage. + +#![cfg(not(feature = "loom"))] + +use std::panic::RefUnwindSafe; + +use bolero::TypeGenerator; +use dataplane_concurrency::sync::Arc; +use dataplane_concurrency::sync::atomic::{AtomicUsize, Ordering}; +use dataplane_concurrency::thread; + +/// One spawned thread's program: a list of increments to perform on +/// the shared counter. Each `u8` is masked to a small range so the +/// test stays cheap under shuttle. +#[derive(Clone, Debug, TypeGenerator)] +struct ThreadPlan { + increments: Vec, +} + +/// A scope's program: up to a few spawned threads. Bolero generates +/// arbitrarily long `Vec` but we clamp to keep search cost +/// bounded inside `run_plan`. +#[derive(Clone, Debug, TypeGenerator)] +struct Plan { + threads: Vec, +} + +const MAX_THREADS: usize = 4; +const MAX_INCREMENTS_PER_THREAD: usize = 4; + +fn expected_sum(plan: &Plan) -> usize { + plan.threads + .iter() + .take(MAX_THREADS) + .map(|tp| { + tp.increments + .iter() + .take(MAX_INCREMENTS_PER_THREAD) + .map(|i| (*i & 0x0f) as usize) + .sum::() + }) + .sum() +} + +fn run_plan(plan: &Plan) { + let counter = Arc::new(AtomicUsize::new(0)); + let expected = expected_sum(plan); + + thread::scope(|s| { + for tp in plan.threads.iter().take(MAX_THREADS) { + let counter_for_thread = Arc::clone(&counter); + let increments: Vec = tp + .increments + .iter() + .take(MAX_INCREMENTS_PER_THREAD) + .copied() + .collect(); + s.spawn(move || { + for inc in &increments { + counter_for_thread.fetch_add((*inc & 0x0f) as usize, Ordering::SeqCst); + } + }); + } + }); + + let observed = counter.load(Ordering::SeqCst); + assert_eq!( + observed, expected, + "scope conservation violated: observed {observed} != expected {expected}", + ); +} + +const TEST_TIME: std::time::Duration = std::time::Duration::from_secs(10); + +fn fuzz_test( + test: impl Fn(Arg) + RefUnwindSafe, +) { + bolero::check!() + .with_type() + .cloned() + .with_test_time(TEST_TIME) + .for_each(test); +} + +#[test] +#[cfg(feature = "shuttle")] +fn scope_conservation_under_shuttle() { + fuzz_test(|plan: Plan| shuttle::check_random(move || run_plan(&plan), 1)); +} + +#[test] +#[cfg(feature = "shuttle")] +fn scope_conservation_under_shuttle_pct() { + fuzz_test(|plan: Plan| { + // PCT requires every thread to do at least one atomic op; + // skip degenerate shapes that wouldn't exercise concurrency. + let nontrivial = plan + .threads + .iter() + .take(MAX_THREADS) + .filter(|tp| !tp.increments.is_empty()) + .count(); + if nontrivial < 2 { + return; + } + shuttle::check_pct(move || run_plan(&plan), 16, 3); + }); +} + +#[test] +#[cfg(not(feature = "shuttle"))] +fn scope_conservation_under_std() { + fuzz_test(|plan: Plan| run_plan(&plan)); +} diff --git a/concurrency/tests/stress_dispatch.rs b/concurrency/tests/stress_dispatch.rs new file mode 100644 index 0000000000..aebda65798 --- /dev/null +++ b/concurrency/tests/stress_dispatch.rs @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Tests for `concurrency::stress` backend dispatch. +//! +//! `stress(body)` is the small router that `#[concurrency::test]` +//! expands to: it picks one of `loom::model`, +//! `shuttle::check_random` / `_pct` / `_dfs`, or direct `body()` based +//! on the active backend's feature. The dispatch table lives in +//! `concurrency/src/stress.rs`. +//! +//! This file pins two coarse but important properties: +//! +//! 1. On the default backend, `stress` invokes `body` exactly once. +//! There is no scheduling exploration; the call should round-trip +//! untouched. +//! +//! 2. On `loom` or `shuttle` (random scheduler), `stress` invokes +//! `body` more than once -- the backend explores multiple +//! schedules / interleavings. Exact counts depend on the backend's +//! internal iteration budget and can change; the test only asserts +//! the contract that exploration actually happens. +//! +//! PCT and DFS are skipped: PCT panics on test bodies that do no +//! concurrent work *on the main thread*, and DFS returns after a +//! single iteration in the schedule we hand it. Both are valid +//! shuttle schedulers but stricter than `check_random`; the dispatch +//! contract is the same for all three, so verifying it under +//! `shuttle` + `loom` is enough. + +// With the `shuttle_dfs -> shuttle_pct -> shuttle` chain in +// `Cargo.toml`, `not(feature = "shuttle_pct")` is true exactly when +// neither PCT nor DFS is selected. +#![cfg(not(feature = "shuttle_pct"))] + +extern crate dataplane_concurrency as concurrency; + +use std::sync::atomic::{AtomicUsize, Ordering}; + +use concurrency::thread; + +// The invocation counter is a plain `static AtomicUsize`, not a +// `concurrency::sync::*` primitive. Two reasons: +// +// * Under loom / shuttle, `concurrency::sync::*` panics when accessed +// from outside the model checker's execution context (which is +// where the test body itself reads the counter, *after* stress +// returns). +// * A `static` is the simplest thing that works from inside and +// outside the body. The test counts invocations *across* the +// whole `stress()` call, not per-iteration, so contention is fine. +// +// Each test resets the counter to 0 before invoking `stress` so the +// tests don't have hidden coupling. + +fn run_dispatch_check() -> usize { + static INVOCATIONS: AtomicUsize = AtomicUsize::new(0); + INVOCATIONS.store(0, Ordering::SeqCst); + concurrency::stress(|| { + INVOCATIONS.fetch_add(1, Ordering::SeqCst); + // PCT panics on bodies that do no concurrent work, so spawn + // one thread that performs one atomic op via the active + // backend's primitives. + let scratch = concurrency::sync::Arc::new(concurrency::sync::atomic::AtomicUsize::new(0)); + let scratch_for_thread = concurrency::sync::Arc::clone(&scratch); + thread::scope(|s| { + s.spawn(move || { + scratch_for_thread.fetch_add(1, concurrency::sync::atomic::Ordering::SeqCst); + }); + }); + }); + INVOCATIONS.load(Ordering::SeqCst) +} + +#[test] +#[cfg(not(any(feature = "loom", feature = "shuttle")))] +fn default_backend_invokes_body_exactly_once() { + let invocations = run_dispatch_check(); + assert_eq!( + invocations, 1, + "default-backend stress should invoke body exactly once", + ); +} + +#[test] +#[cfg(any(feature = "loom", feature = "shuttle"))] +fn model_check_backend_invokes_body_more_than_once() { + let invocations = run_dispatch_check(); + assert!( + invocations > 1, + "model-check backend stress should invoke body more than once \ + (exploring schedules); observed {invocations}", + ); +} + +// `#[concurrency::test]` emits `#[::core::prelude::v1::test]` BEFORE +// the captured `#(#attrs)*`. These two tests pin that user-supplied +// `#[should_panic]` / `#[ignore]` attributes still attach to the +// synthesised function -- a future macro refactor that reorders the +// emitted attributes (or swallows them) breaks here loudly instead +// of silently turning real test signals into no-ops. + +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +#[should_panic(expected = "intentional")] +fn should_panic_attribute_attaches() { + panic!("intentional"); +} + +#[cfg(not(any(feature = "loom", feature = "shuttle_pct")))] +#[concurrency::test] +#[ignore = "verifies #[ignore] threads through; not run by default"] +fn ignore_attribute_attaches() { + panic!("test body must not run when #[ignore] is honoured"); +} diff --git a/concurrency/tests/thread_scope.rs b/concurrency/tests/thread_scope.rs new file mode 100644 index 0000000000..6586b3fcdc --- /dev/null +++ b/concurrency/tests/thread_scope.rs @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Direct coverage for `concurrency::thread::scope` -- the loom shim +//! in particular, but the tests pass under every backend. +//! +//! Loom 0.7 does not ship `thread::scope`. The crate provides one in +//! `concurrency/src/thread/loom_scope.rs` built on `loom::spawn` plus +//! an `Arc>>` keepalive pattern that preserves the +//! drop-affinity guarantee `std::thread::scope` offers. +//! +//! The shim is exercised indirectly by `tests/quiescent_model.rs`, but +//! those tests would surface failures as quiescent-protocol bugs rather +//! than as localised shim bugs. The tests in this file pin the +//! `thread::scope` contract itself so a future regression in the shim +//! fails here loudly and at the right layer. +//! +//! The same source runs under every backend via `#[concurrency::test]`, +//! and on the default and shuttle backends it exercises the *real* +//! `std::thread::scope` / `shuttle::thread::scope` -- which is the +//! point: the contract is the same; only the *internals* differ. +//! +//! Run under loom (the headline use case) with: +//! +//! ```sh +//! cargo test --release -p dataplane-concurrency --features loom --test thread_scope +//! ``` + +extern crate dataplane_concurrency as concurrency; + +use concurrency::sync::Arc; +use concurrency::sync::atomic::{AtomicUsize, Ordering}; +use concurrency::thread; + +// Several tests below have the spawn-and-wait shape ("main spawns, +// joins via the implicit auto-join, reads only after scope returns"), +// which PCT counts as "the main thread did no concurrent work" and +// panics on. Same approach `quiescent_model.rs` takes for its +// single-threaded `snapshot_after_publish_observes_published` test. +// Tests with two or more spawns issuing atomic ops (e.g. +// `multiple_spawns_all_join_before_return`) are PCT-compatible. + +/// `scope()` returns the body's value. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn scope_returns_body_value() { + let v = thread::scope(|_| 42u32); + assert_eq!(v, 42); +} + +/// A single spawned thread is joined before `scope()` returns; the +/// `AtomicUsize` it wrote is visible to the caller (Acquire on join). +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn single_spawn_joins_before_return() { + let counter = Arc::new(AtomicUsize::new(0)); + let counter_for_thread = Arc::clone(&counter); + thread::scope(|s| { + s.spawn(move || { + counter_for_thread.fetch_add(1, Ordering::SeqCst); + }); + }); + assert_eq!(counter.load(Ordering::SeqCst), 1); +} + +/// Multiple spawned threads all join before `scope()` returns. +#[concurrency::test] +fn multiple_spawns_all_join_before_return() { + let counter = Arc::new(AtomicUsize::new(0)); + thread::scope(|s| { + let c1 = Arc::clone(&counter); + s.spawn(move || { + c1.fetch_add(1, Ordering::SeqCst); + }); + let c2 = Arc::clone(&counter); + s.spawn(move || { + c2.fetch_add(1, Ordering::SeqCst); + }); + }); + assert_eq!(counter.load(Ordering::SeqCst), 2); +} + +/// `ScopedJoinHandle::join` returns the spawned thread's value. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn explicit_join_returns_value() { + thread::scope(|s| { + let h = s.spawn(|| 99u32); + let v = h.join().expect("spawned thread did not panic"); + assert_eq!(v, 99); + }); +} + +/// Spawned closures may borrow data of any lifetime that outlives the +/// scope -- the headline `std::thread::scope` guarantee. Under loom +/// this is the shim's `mem::transmute` doing its job. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn spawn_can_borrow_from_enclosing_scope() { + let counter = Arc::new(AtomicUsize::new(0)); + // `local` is owned by the test body; it lives in the enclosing + // stack frame. The spawn closure borrows it by reference, which + // would not compile on plain `thread::spawn` (no `'static`). + let local = 7u32; + let local_ref = &local; + thread::scope(|s| { + let c = Arc::clone(&counter); + s.spawn(move || { + c.store(*local_ref as usize, Ordering::SeqCst); + }); + }); + assert_eq!(counter.load(Ordering::SeqCst), 7); +} + +/// Two spawns in the same scope, each writing a distinct value, both +/// readable after `scope()` returns. Loom explores all interleavings of +/// the two stores; under any of them, both values are eventually +/// observed because both joins happen before `scope` returns. +#[concurrency::test] +fn two_spawns_independent_writes() { + let a = Arc::new(AtomicUsize::new(0)); + let b = Arc::new(AtomicUsize::new(0)); + thread::scope(|s| { + let a_for = Arc::clone(&a); + s.spawn(move || { + a_for.store(1, Ordering::SeqCst); + }); + let b_for = Arc::clone(&b); + s.spawn(move || { + b_for.store(2, Ordering::SeqCst); + }); + }); + assert_eq!(a.load(Ordering::SeqCst), 1); + assert_eq!(b.load(Ordering::SeqCst), 2); +} + +/// A scoped thread that itself calls `s.spawn(...)` on the parent +/// scope pushes new entries onto the scope's `pending` queue after +/// the parent thread has already entered the teardown drain. The +/// shim must keep draining until the queue stays empty across a full +/// pass; otherwise the nested spawn's `JoinHandle` is leaked and the +/// `'scope` -> `'static` transmute is unsound (the closure outlives +/// `'scope`). +#[concurrency::test] +fn nested_scoped_spawn_is_joined() { + let outer_done = Arc::new(AtomicUsize::new(0)); + let inner_done = Arc::new(AtomicUsize::new(0)); + thread::scope(|s| { + let outer_for_thread = Arc::clone(&outer_done); + let inner_for_thread = Arc::clone(&inner_done); + s.spawn(move || { + // Re-enter `s` from inside an already-spawned scoped + // thread. The handle for this inner spawn is registered + // in the same `Scope`'s `pending` list, but it can land + // there after the parent thread has already taken a + // snapshot of `pending` to drain. The shim's teardown + // must keep looping until `pending` is empty across a + // full pass. + s.spawn(move || { + inner_for_thread.fetch_add(1, Ordering::SeqCst); + }); + outer_for_thread.fetch_add(1, Ordering::SeqCst); + }); + }); + assert_eq!( + outer_done.load(Ordering::SeqCst), + 1, + "outer scoped thread did not run to completion before scope returned", + ); + assert_eq!( + inner_done.load(Ordering::SeqCst), + 1, + "nested scoped thread did not run to completion before scope returned", + ); +} + +/// `Drop::drop` of a value moved into a spawned closure runs (at the +/// latest) when the spawned thread is joined -- i.e. before `scope()` +/// returns. Pinned via an `AtomicUsize` incremented from within the +/// payload's `Drop` impl. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn moved_value_drop_runs_before_scope_returns() { + struct Bump(Arc); + impl Drop for Bump { + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::SeqCst); + } + } + let bumps = Arc::new(AtomicUsize::new(0)); + thread::scope(|s| { + let payload = Bump(Arc::clone(&bumps)); + s.spawn(move || { + // Body consumes `payload` implicitly at end of scope. + let _keep = payload; + }); + }); + assert_eq!(bumps.load(Ordering::SeqCst), 1); +} diff --git a/dpdk-sys/build.rs b/dpdk-sys/build.rs index e0c5a219c1..d204a7562d 100644 --- a/dpdk-sys/build.rs +++ b/dpdk-sys/build.rs @@ -93,6 +93,7 @@ fn main() { "rte_hash", "rte_rcu", "rte_ring", + "rte_acl", "rte_eal", "rte_argparse", "rte_kvargs", diff --git a/dpdk/Cargo.toml b/dpdk/Cargo.toml index 32b186c177..fdee013955 100644 --- a/dpdk/Cargo.toml +++ b/dpdk/Cargo.toml @@ -11,6 +11,7 @@ serde = ["dep:serde"] [dependencies] +concurrency = { workspace = true } dpdk-sys = { workspace = true } errno = { workspace = true } net = { workspace = true } @@ -21,3 +22,9 @@ tracing = { workspace = true, features = ["attributes"] } [build-dependencies] dpdk-sysroot-helper = { workspace = true } + +[dev-dependencies] +id = { workspace = true } + +bolero = { workspace = true, default-features = false, features = ["std"] } +nix = { workspace = true, features = ["sched"] } diff --git a/dpdk/src/acl/classify.rs b/dpdk/src/acl/classify.rs new file mode 100644 index 0000000000..37539ee6f1 --- /dev/null +++ b/dpdk/src/acl/classify.rs @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! ACL classification algorithm selection. +//! +//! DPDK provides multiple SIMD-accelerated implementations of its ACL classification engine. +//! The [`ClassifyAlgorithm`] enum exposes these as a safe Rust type that can be used with +//! [`AclContext::classify_with_algorithm`][super::context::AclContext] or +//! [`AclContext::set_default_algorithm`][super::context::AclContext]. +//! +//! In most cases [`ClassifyAlgorithm::Default`] is the right choice -- DPDK will automatically +//! select the best implementation for the current CPU at build time. Explicit selection is useful +//! for benchmarking or for targeting a specific code path. + +use core::fmt::{self, Display, Formatter}; + +// --------------------------------------------------------------------------- +// ClassifyAlgorithm +// --------------------------------------------------------------------------- + +/// SIMD implementation to use for ACL classification. +/// +/// Maps 1:1 to the `RTE_ACL_CLASSIFY_*` constants in +/// [`rte_acl_classify_alg`][mod@dpdk_sys::rte_acl_classify_alg]. +/// +/// # Platform support +/// +/// Not every variant is available on every CPU. Requesting an unsupported algorithm will result +/// in an error from [`rte_acl_classify_alg`][fn@dpdk_sys::rte_acl_classify_alg] or +/// [`rte_acl_set_ctx_classify`][dpdk_sys::rte_acl_set_ctx_classify]. +/// [`Default`][ClassifyAlgorithm::Default] is always available and is recommended unless you have +/// a specific reason to select a particular implementation. +#[repr(u32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Default)] +pub enum ClassifyAlgorithm { + /// Let DPDK choose the best available implementation for the current CPU. + /// + /// This is almost always what you want. + /// + /// Corresponds to + /// [`RTE_ACL_CLASSIFY_DEFAULT`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_DEFAULT]. + /// + /// # Asymmetry between the two DPDK entry points + /// + /// `Default` carries different meaning across DPDK's two algorithm-selection paths: + /// + /// - [`rte_acl_set_ctx_classify(ctx, DEFAULT)`][dpdk_sys::rte_acl_set_ctx_classify] + /// expands `DEFAULT` to the best available implementation on the current + /// CPU (this is the "DPDK choose best" semantics). + /// - [`rte_acl_classify_alg(ctx, ..., DEFAULT)`][fn@dpdk_sys::rte_acl_classify_alg] + /// indexes table slot 0 in the dispatch table, which is the scalar + /// implementation -- *not* "DPDK choose best". + /// + /// To make `Default` mean the same thing through either Rust entry point, + /// [`AclContext::classify_with_algorithm`][super::context::AclContext::classify_with_algorithm] + /// special-cases `Default` to dispatch via + /// [`rte_acl_classify`][dpdk_sys::rte_acl_classify] (which uses the + /// context's currently-set algorithm) instead of through + /// `rte_acl_classify_alg`. Use [`Scalar`][ClassifyAlgorithm::Scalar] + /// explicitly if you want the scalar implementation. + #[default] + Default = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_DEFAULT, + + /// Portable scalar (non-SIMD) implementation. + /// + /// Available on all platforms. Useful as a baseline for benchmarks. + /// + /// Corresponds to + /// [`RTE_ACL_CLASSIFY_SCALAR`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_SCALAR]. + Scalar = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_SCALAR, + + /// SSE 4.1 vectorized implementation. + /// + /// Requires x86-64 SSE 4.1 support. + /// + /// Corresponds to + /// [`RTE_ACL_CLASSIFY_SSE`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_SSE]. + Sse = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_SSE, + + /// AVX2 vectorized implementation. + /// + /// Requires x86-64 AVX2 support. + /// + /// Corresponds to + /// [`RTE_ACL_CLASSIFY_AVX2`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX2]. + Avx2 = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX2, + + /// ARM NEON vectorized implementation. + /// + /// Requires AArch64 NEON support. + /// + /// Corresponds to + /// [`RTE_ACL_CLASSIFY_NEON`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_NEON]. + Neon = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_NEON, + + /// PowerPC AltiVec vectorized implementation. + /// + /// Requires PowerPC AltiVec / VMX support. + /// + /// Corresponds to + /// [`RTE_ACL_CLASSIFY_ALTIVEC`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_ALTIVEC]. + Altivec = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_ALTIVEC, + + /// AVX-512 vectorized implementation processing 16 flows in parallel. + /// + /// Requires x86-64 AVX-512 support (specifically AVX-512BW). + /// + /// Corresponds to + /// [`RTE_ACL_CLASSIFY_AVX512X16`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX512X16]. + Avx512x16 = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX512X16, + + /// AVX-512 vectorized implementation processing 32 flows in parallel. + /// + /// Requires x86-64 AVX-512 support (specifically AVX-512BW). + /// + /// Corresponds to + /// [`RTE_ACL_CLASSIFY_AVX512X32`][dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX512X32]. + Avx512x32 = dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX512X32, +} + +impl ClassifyAlgorithm { + /// Convert to the raw `u32` discriminant value expected by the DPDK C API. + #[must_use] + #[inline] + pub const fn as_u32(self) -> u32 { + self as u32 + } + + /// Attempt to parse a raw `u32` into a [`ClassifyAlgorithm`]. + /// + /// Returns `None` if the value does not correspond to a known algorithm. + /// See also the [`TryFrom`] impl, which is the same operation framed as the + /// idiomatic conversion trait. + #[must_use] + pub const fn from_u32(value: u32) -> Option { + match value { + x if x == Self::Default as u32 => Some(Self::Default), + x if x == Self::Scalar as u32 => Some(Self::Scalar), + x if x == Self::Sse as u32 => Some(Self::Sse), + x if x == Self::Avx2 as u32 => Some(Self::Avx2), + x if x == Self::Neon as u32 => Some(Self::Neon), + x if x == Self::Altivec as u32 => Some(Self::Altivec), + x if x == Self::Avx512x16 as u32 => Some(Self::Avx512x16), + x if x == Self::Avx512x32 as u32 => Some(Self::Avx512x32), + _ => None, + } + } + + /// Returns `true` if this is an x86-64 specific algorithm variant. + #[must_use] + pub const fn is_x86_64(&self) -> bool { + matches!( + self, + Self::Sse | Self::Avx2 | Self::Avx512x16 | Self::Avx512x32 + ) + } + + /// Returns `true` if this is an ARM specific algorithm variant. + #[must_use] + pub const fn is_aarch64(&self) -> bool { + matches!(self, Self::Neon) + } + + /// Returns `true` if this is a PowerPC specific algorithm variant. + #[must_use] + pub const fn is_powerpc(&self) -> bool { + matches!(self, Self::Altivec) + } + + /// Returns `true` if this is a platform-independent variant. + #[must_use] + pub const fn is_portable(&self) -> bool { + matches!(self, Self::Default | Self::Scalar) + } +} + +impl Display for ClassifyAlgorithm { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Self::Default => write!(f, "Default"), + Self::Scalar => write!(f, "Scalar"), + Self::Sse => write!(f, "SSE"), + Self::Avx2 => write!(f, "AVX2"), + Self::Neon => write!(f, "NEON"), + Self::Altivec => write!(f, "AltiVec"), + Self::Avx512x16 => write!(f, "AVX-512 (x16)"), + Self::Avx512x32 => write!(f, "AVX-512 (x32)"), + } + } +} + +impl From for dpdk_sys::rte_acl_classify_alg::Type { + #[inline] + fn from(alg: ClassifyAlgorithm) -> Self { + alg.as_u32() + } +} + +/// Unknown algorithm discriminant returned by [`ClassifyAlgorithm::try_from`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)] +#[error("unknown rte_acl_classify_alg discriminant {0}")] +pub struct UnknownClassifyAlgorithm(pub u32); + +impl TryFrom for ClassifyAlgorithm { + type Error = UnknownClassifyAlgorithm; + fn try_from(value: u32) -> Result { + Self::from_u32(value).ok_or(UnknownClassifyAlgorithm(value)) + } +} + +// --------------------------------------------------------------------------- +// Compile-time assertions +// --------------------------------------------------------------------------- + +/// Verify that our enum discriminants match the DPDK constants exactly. +const _: () = { + use dpdk_sys::rte_acl_classify_alg::*; + + assert!(ClassifyAlgorithm::Default as u32 == RTE_ACL_CLASSIFY_DEFAULT); + assert!(ClassifyAlgorithm::Scalar as u32 == RTE_ACL_CLASSIFY_SCALAR); + assert!(ClassifyAlgorithm::Sse as u32 == RTE_ACL_CLASSIFY_SSE); + assert!(ClassifyAlgorithm::Avx2 as u32 == RTE_ACL_CLASSIFY_AVX2); + assert!(ClassifyAlgorithm::Neon as u32 == RTE_ACL_CLASSIFY_NEON); + assert!(ClassifyAlgorithm::Altivec as u32 == RTE_ACL_CLASSIFY_ALTIVEC); + assert!(ClassifyAlgorithm::Avx512x16 as u32 == RTE_ACL_CLASSIFY_AVX512X16); + assert!(ClassifyAlgorithm::Avx512x32 as u32 == RTE_ACL_CLASSIFY_AVX512X32); +}; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_is_zero() { + assert_eq!(ClassifyAlgorithm::Default.as_u32(), 0); + assert_eq!(ClassifyAlgorithm::default(), ClassifyAlgorithm::Default); + } + + #[test] + fn round_trip_all_variants() { + let variants = [ + ClassifyAlgorithm::Default, + ClassifyAlgorithm::Scalar, + ClassifyAlgorithm::Sse, + ClassifyAlgorithm::Avx2, + ClassifyAlgorithm::Neon, + ClassifyAlgorithm::Altivec, + ClassifyAlgorithm::Avx512x16, + ClassifyAlgorithm::Avx512x32, + ]; + for variant in variants { + let raw = variant.as_u32(); + let parsed = ClassifyAlgorithm::from_u32(raw); + assert_eq!(parsed, Some(variant), "round-trip failed for {variant}"); + } + } + + #[test] + fn from_u32_rejects_unknown() { + assert_eq!(ClassifyAlgorithm::from_u32(99), None); + assert_eq!(ClassifyAlgorithm::from_u32(u32::MAX), None); + } + + #[test] + fn display_all_variants() { + let display_strings = [ + (ClassifyAlgorithm::Default, "Default"), + (ClassifyAlgorithm::Scalar, "Scalar"), + (ClassifyAlgorithm::Sse, "SSE"), + (ClassifyAlgorithm::Avx2, "AVX2"), + (ClassifyAlgorithm::Neon, "NEON"), + (ClassifyAlgorithm::Altivec, "AltiVec"), + (ClassifyAlgorithm::Avx512x16, "AVX-512 (x16)"), + (ClassifyAlgorithm::Avx512x32, "AVX-512 (x32)"), + ]; + for (variant, expected) in display_strings { + assert_eq!(format!("{variant}"), expected); + } + } + + #[test] + fn platform_classification() { + assert!(ClassifyAlgorithm::Default.is_portable()); + assert!(ClassifyAlgorithm::Scalar.is_portable()); + + assert!(ClassifyAlgorithm::Sse.is_x86_64()); + assert!(ClassifyAlgorithm::Avx2.is_x86_64()); + assert!(ClassifyAlgorithm::Avx512x16.is_x86_64()); + assert!(ClassifyAlgorithm::Avx512x32.is_x86_64()); + + assert!(ClassifyAlgorithm::Neon.is_aarch64()); + assert!(ClassifyAlgorithm::Altivec.is_powerpc()); + + // Cross-checks: portable variants should not be platform-specific. + assert!(!ClassifyAlgorithm::Default.is_x86_64()); + assert!(!ClassifyAlgorithm::Default.is_aarch64()); + assert!(!ClassifyAlgorithm::Default.is_powerpc()); + + // Platform-specific variants should not be portable. + assert!(!ClassifyAlgorithm::Sse.is_portable()); + assert!(!ClassifyAlgorithm::Neon.is_portable()); + assert!(!ClassifyAlgorithm::Altivec.is_portable()); + } + + #[test] + fn into_dpdk_type() { + let alg = ClassifyAlgorithm::Avx2; + let raw: dpdk_sys::rte_acl_classify_alg::Type = alg.into(); + assert_eq!(raw, dpdk_sys::rte_acl_classify_alg::RTE_ACL_CLASSIFY_AVX2); + } + + /// All known discriminants -- the universe `from_u32` must accept and the + /// universe `as_u32` round-trips through. + const KNOWN: &[ClassifyAlgorithm] = &[ + ClassifyAlgorithm::Default, + ClassifyAlgorithm::Scalar, + ClassifyAlgorithm::Sse, + ClassifyAlgorithm::Avx2, + ClassifyAlgorithm::Neon, + ClassifyAlgorithm::Altivec, + ClassifyAlgorithm::Avx512x16, + ClassifyAlgorithm::Avx512x32, + ]; + + /// Property: for every `u32`, `from_u32` either round-trips through `as_u32` + /// (when the value is a known discriminant) or rejects with `None` (when it + /// is not). Generalises the hand-rolled `round_trip_all_variants` test over + /// the entire `u32` domain. + #[test] + fn from_u32_round_trip_property() { + bolero::check!().with_type::().for_each( + |value: &u32| match ClassifyAlgorithm::from_u32(*value) { + Some(alg) => assert_eq!( + alg.as_u32(), + *value, + "from_u32({value}) -> {alg:?} but {alg:?}.as_u32() = {}", + alg.as_u32() + ), + None => { + for variant in KNOWN { + assert_ne!( + variant.as_u32(), + *value, + "from_u32({value}) returned None but {variant:?} has that discriminant" + ); + } + } + }, + ); + } + + /// Property: `TryFrom` matches `from_u32` exactly. + #[test] + fn try_from_matches_from_u32() { + bolero::check!().with_type::().for_each(|value: &u32| { + let opt = ClassifyAlgorithm::from_u32(*value); + let res = ClassifyAlgorithm::try_from(*value).ok(); + assert_eq!(opt, res); + }); + } +} diff --git a/dpdk/src/acl/config.rs b/dpdk/src/acl/config.rs new file mode 100644 index 0000000000..b446c303dc --- /dev/null +++ b/dpdk/src/acl/config.rs @@ -0,0 +1,1734 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! ACL configuration types. +//! +//! This module provides safe, validated configuration types for the two main ACL setup calls: +//! +//! - [`AclCreateParams`] -- parameters for creating an ACL context +//! ([`rte_acl_create`][dpdk_sys::rte_acl_create]). +//! - [`AclBuildConfig`]`` -- parameters for compiling rules into runtime lookup structures +//! ([`rte_acl_build`][dpdk_sys::rte_acl_build]). +//! +//! Following the project convention of validating inputs at the boundary, both types perform +//! validation at construction time so that downstream code can assume the configuration is valid. + +use core::ffi::CStr; +use core::fmt::{self, Display}; +use core::marker::PhantomData; +use core::num::NonZero; + +use std::ffi::CString; + +use tracing::debug; + +use crate::socket::SocketId; + +use super::error::InvalidAclName; +use super::field::FieldDef; +use super::rule::Rule; + +// --------------------------------------------------------------------------- +// AclCreateParams +// --------------------------------------------------------------------------- + +/// Validated parameters for creating an ACL context with `N` fields per rule. +/// +/// This is the safe Rust equivalent of [`rte_acl_param`][dpdk_sys::rte_acl_param]. +/// The name is validated at construction time and stored as a [`CString`] for zero-cost FFI. +/// +/// # Why the const generic is on the type, not the constructor +/// +/// `N` lives on the type so that +/// [`AclContext::::new`][super::context::AclContext::new] can require +/// `AclCreateParams` with the **same** `N`. Erasing `N` after construction +/// would let `AclContext::<3>::new(AclCreateParams::<5>::new(...))` compile +/// while DPDK strides through rules at `rule_size = size_of::>()` over +/// `Rule<3>`-sized slots -- the exact OOB read the const generic is meant to +/// rule out. Keeping `N` on the type closes that gap statically and is +/// consistent with how [`AclBuildConfig`] is parameterised. +/// +/// # Construction +/// +/// Use [`AclCreateParams::::new`][AclCreateParams::new] to create a validated instance. +/// +/// ```ignore +/// let params = AclCreateParams::<5>::new("my_acl", SocketId::ANY, NonZero::new(1024).unwrap())?; +/// ``` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AclCreateParams { + /// Validated ACL context name (ASCII, non-empty, no null bytes, within length limit). + name: CString, + /// NUMA socket on which to allocate the context's memory. + socket_id: SocketId, + /// Maximum number of rules this context can hold. Non-zero: a context that + /// cannot hold any rules is useless and `rte_acl_create` rejects it with + /// `EINVAL`. + max_rule_num: NonZero, + /// Size of each rule in bytes -- equal to + /// [`Rule::::RULE_SIZE`][Rule::RULE_SIZE]. Stored as + /// [`NonZero`] because `N > 0` implies `size_of::>() > 0`, + /// and a zero `rule_size` would be rejected by DPDK with `EINVAL`. + rule_size: NonZero, + /// Carries `N` on the type without taking up space. + _phantom: PhantomData<[(); N]>, +} + +/// The maximum length (in bytes, **excluding** the null terminator) of an ACL context name. +/// +/// DPDK's [`RTE_ACL_NAMESIZE`][dpdk_sys::RTE_ACL_NAMESIZE] includes the null terminator, so the +/// usable string length is one less. +pub const MAX_ACL_NAME_LEN: usize = (dpdk_sys::RTE_ACL_NAMESIZE as usize).saturating_sub(1); + +impl AclCreateParams { + /// Compile-time guard: `N == 0` is rejected here so that + /// [`AclContext::<0, _>`][super::context::AclContext] is unconstructable + /// via the public API. Forced to evaluate in `new` via a let-binding. + const _CHECK_N_NONZERO: () = assert!(N > 0, "AclCreateParams requires N > 0"); + + /// Compile-time guard: `N` must not exceed + /// [`MAX_FIELDS`][super::config::MAX_FIELDS] (DPDK's + /// `RTE_ACL_MAX_FIELDS` = 64). Larger `N` would also be rejected by + /// [`AclBuildConfig::new`][super::config::AclBuildConfig::new], but + /// must be rejected **here** first: `Rule::::RULE_SIZE` + /// computes `size_of::>() as u32`, and for very large `N` + /// the cast can wrap to `0`, after which the `NonZero::new_unchecked` + /// below would invoke undefined behaviour. Capping `N` at + /// `MAX_FIELDS` keeps `size_of::>()` well under `u32::MAX` + /// (it is at most 16 + 16 * 64 = 1040 bytes), so the cast is exact + /// and non-zero. + const _CHECK_N_FITS_U32_RULE_SIZE: () = assert!( + N <= MAX_FIELDS, + "AclCreateParams requires N <= RTE_ACL_MAX_FIELDS (64); larger N would \ + truncate size_of::>() during the u32 cast and risk UB." + ); + + /// Create validated ACL creation parameters. + /// + /// `N` (on the type) must match the number of [`FieldDef`] entries that + /// will be used when building the context, as well as the number of + /// fields in every [`Rule`][Rule] added to the context. It is used + /// here to compute the `rule_size` that DPDK requires at creation time. + /// + /// # Arguments + /// + /// * `name` -- human-readable name for the context. Must be non-empty ASCII without null + /// bytes, at most [`MAX_ACL_NAME_LEN`] bytes long. + /// * `socket_id` -- the NUMA socket to allocate memory on. Use [`SocketId::ANY`] if you don't + /// have a preference. + /// * `max_rule_num` -- the maximum number of rules this context will hold. + /// Non-zero by type; a context that cannot hold any rules has no use and + /// DPDK rejects it with `EINVAL`. + /// + /// # Compile-time checks + /// + /// `N == 0` is rejected by `_CHECK_N_NONZERO`; `N > MAX_FIELDS` is + /// rejected by `_CHECK_N_FITS_U32_RULE_SIZE`. Both are evaluated at + /// monomorphisation time via let-bindings in this function. + /// + /// # Errors + /// + /// Returns [`InvalidAclName`] if the name fails validation. + #[cold] + #[tracing::instrument(level = "debug", skip(name), fields(name = name.as_ref()))] + pub fn new( + name: impl AsRef, + socket_id: SocketId, + max_rule_num: NonZero, + ) -> Result { + // Force evaluation of both const assertions for this monomorphisation. + let () = Self::_CHECK_N_NONZERO; + let () = Self::_CHECK_N_FITS_U32_RULE_SIZE; + + let name = Self::validate_name(name.as_ref())?; + // `Rule::::RULE_SIZE == size_of::>() as u32`. The + // two const assertions above guarantee `0 < N <= MAX_FIELDS`, + // so `size_of::>()` is in `[28, 1040]` -- well under + // `u32::MAX`, and certainly non-zero. The `unreachable!()` + // arm is therefore dead; we surface it as a panic rather than + // `unsafe { new_unchecked }` so that a broken invariant + // faults loudly instead of being undefined behaviour. + let rule_size = match NonZero::new(Rule::::RULE_SIZE) { + Some(nz) => nz, + None => unreachable!(), + }; + debug!( + "Created ACL params: name={}, socket_id={:?}, max_rule_num={}, rule_size={}", + name.to_str().unwrap_or(""), + socket_id, + max_rule_num, + rule_size, + ); + Ok(Self { + name, + socket_id, + max_rule_num, + rule_size, + _phantom: PhantomData, + }) + } + + /// Validate and convert an ACL context name to a [`CString`]. + #[cold] + fn validate_name(name: &str) -> Result { + if name.is_empty() { + return Err(InvalidAclName::Empty); + } + if !name.is_ascii() { + return Err(InvalidAclName::NotAscii); + } + if name.len() > MAX_ACL_NAME_LEN { + return Err(InvalidAclName::TooLong { + len: name.len(), + max: MAX_ACL_NAME_LEN, + }); + } + CString::new(name).map_err(|_| InvalidAclName::ContainsNullBytes) + } + + /// Get the context name as a `&str`. + #[must_use] + pub fn name(&self) -> &str { + // SAFETY: The name is validated as ASCII at construction time and therefore is + // also valid UTF-8. `self.name` is a `CString`, so `to_bytes()` excludes the + // trailing NUL. + unsafe { core::str::from_utf8_unchecked(self.name.to_bytes()) } + } + + /// Get the name as a [`CString`] reference, suitable for FFI. + #[must_use] + pub fn name_cstr(&self) -> &CStr { + &self.name + } + + /// Get the NUMA socket preference. + #[must_use] + pub fn socket_id(&self) -> SocketId { + self.socket_id + } + + /// Get the maximum rule count. + #[must_use] + pub fn max_rule_num(&self) -> NonZero { + self.max_rule_num + } + + /// Get the per-rule byte size. + /// + /// This was computed from the const generic `N` at construction time and equals + /// `core::mem::size_of::>()`. Non-zero by type since `N > 0`. + #[must_use] + pub fn rule_size(&self) -> NonZero { + self.rule_size + } + + /// Build the raw DPDK [`rte_acl_param`][dpdk_sys::rte_acl_param], borrowed from `self`. + /// + /// The returned [`RawParams`] holds a `rte_acl_param` whose `name` pointer is + /// borrowed from `self.name`. The lifetime on [`RawParams`] ties the raw + /// struct to `&self`, preventing use-after-free if `self` is dropped before + /// the FFI call completes. + pub(crate) fn to_raw(&self) -> RawParams<'_> { + // Cast rationale for `socket_id`: + // + // [`SocketId`] wraps a `c_uint`, but DPDK's + // [`rte_acl_param`][dpdk_sys::rte_acl_param] field is `c_int`. + // The cast is exact for the two value classes that ever appear in + // a valid `SocketId`: + // + // - [`SocketId::ANY`][crate::socket::SocketId::ANY] is defined as + // `c_uint::MAX`, which two's-complement-casts to `-1` -- + // precisely DPDK's `SOCKET_ID_ANY` sentinel. + // - Real NUMA socket IDs are small non-negative integers + // (`< RTE_MAX_NUMA_NODES`, currently 32), safely representable + // in `c_int`. + // + // No value class produces silent wraparound here, so the `as` + // cast is sound without a runtime check. + RawParams { + raw: dpdk_sys::rte_acl_param { + name: self.name.as_ptr(), + socket_id: self.socket_id.as_c_uint() as core::ffi::c_int, + rule_size: self.rule_size.get(), + max_rule_num: self.max_rule_num.get(), + }, + _borrow: PhantomData, + } + } +} + +/// A [`rte_acl_param`][dpdk_sys::rte_acl_param] that borrows its name pointer +/// from an owning [`AclCreateParams`]. +/// +/// The lifetime parameter ensures that the FFI struct cannot outlive the +/// [`AclCreateParams`] that owns the underlying C string. Use [`as_ptr`] to +/// pass the raw pointer into a DPDK call. +/// +/// [`as_ptr`]: RawParams::as_ptr +pub(crate) struct RawParams<'a> { + raw: dpdk_sys::rte_acl_param, + _borrow: PhantomData<&'a CStr>, +} + +impl RawParams<'_> { + /// Get a pointer to the raw [`rte_acl_param`][dpdk_sys::rte_acl_param]. + /// + /// The pointer is valid for as long as `self` lives. + #[inline] + pub(crate) fn as_ptr(&self) -> *const dpdk_sys::rte_acl_param { + &self.raw + } +} + +impl Display for AclCreateParams { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "AclCreateParams<{N}> {{ name: \"{}\", socket_id: {:?}, max_rule_num: {}, rule_size: {} }}", + self.name(), + self.socket_id, + self.max_rule_num, + self.rule_size, + ) + } +} + +// --------------------------------------------------------------------------- +// AclBuildConfig +// --------------------------------------------------------------------------- + +/// Maximum number of categories that can be used in an ACL context. +/// +/// Corresponds to [`RTE_ACL_MAX_CATEGORIES`][dpdk_sys::RTE_ACL_MAX_CATEGORIES]. +pub const MAX_CATEGORIES: u32 = dpdk_sys::RTE_ACL_MAX_CATEGORIES; + +/// The required alignment factor for the number of categories. +/// +/// The `num_categories` value must be either `1` or a multiple of this value. +/// +/// Corresponds to [`RTE_ACL_RESULTS_MULTIPLIER`][dpdk_sys::RTE_ACL_RESULTS_MULTIPLIER]. +pub const RESULTS_MULTIPLIER: u32 = dpdk_sys::RTE_ACL_RESULTS_MULTIPLIER; + +/// Maximum number of fields per ACL rule. +/// +/// Corresponds to [`RTE_ACL_MAX_FIELDS`][dpdk_sys::RTE_ACL_MAX_FIELDS]. +pub const MAX_FIELDS: usize = dpdk_sys::RTE_ACL_MAX_FIELDS as usize; + +/// Validated build configuration for compiling ACL rules into runtime lookup structures. +/// +/// This is the safe Rust equivalent of [`rte_acl_config`][dpdk_sys::rte_acl_config]. +/// +/// The const generic `N` must match the `N` used in the [`AclContext`][super::context::AclContext] +/// and in the [`Rule`]`` type. This is enforced by the type system -- the +/// [`build`][super::context::AclContext::build] method requires an `AclBuildConfig` with the same +/// `N` as the context. +/// +/// # Validation +/// +/// The constructor validates: +/// - `N <= 64` ([`RTE_ACL_MAX_FIELDS`][dpdk_sys::RTE_ACL_MAX_FIELDS]) +/// - `num_categories` is between 1 and [`MAX_CATEGORIES`] (inclusive) +/// - `num_categories` is 1 or a multiple of [`RESULTS_MULTIPLIER`] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AclBuildConfig { + /// Number of categories to build with. + /// + /// Must be in `1..=`[`MAX_CATEGORIES`] and either `1` or a multiple of + /// [`RESULTS_MULTIPLIER`]. + num_categories: u32, + + /// Field definitions -- one per field in the rule. + /// + /// The order and semantics of these definitions must match the order of + /// [`AclField`][super::rule::AclField] entries in the [`Rule`]`` instances added to the + /// context. + field_defs: [FieldDef; N], + + /// Maximum memory size (in bytes) for the compiled runtime structures. + /// + /// Set to `0` to impose no limit. + max_size: usize, + + /// Cached output of [`min_input_size`][AclBuildConfig::min_input_size]. + /// + /// Computed once at construction; constant for the lifetime of the + /// config since `field_defs` cannot be mutated after `new` returns. + /// Avoids O(N^2) re-computation on every classify-time pre-flight. + min_input_size: usize, +} + +/// Errors that can occur when constructing an [`AclBuildConfig`]. +#[derive(Debug, thiserror::Error, Copy, Clone, PartialEq, Eq)] +pub enum InvalidAclBuildConfig { + /// `N` exceeds [`RTE_ACL_MAX_FIELDS`][dpdk_sys::RTE_ACL_MAX_FIELDS]. + #[error("Too many fields: {num_fields} exceeds maximum of {max}")] + TooManyFields { + /// The number of fields that was requested. + num_fields: usize, + /// The maximum allowed. + max: usize, + }, + /// One of the [`FieldDef`] entries has `field_index >= N`. DPDK uses + /// `field_index` to look up each definition's value in the rule's field + /// array; an out-of-range index would read past `Rule`. + #[error( + "FieldDef.field_index {field_index} is out of range for N = {n} \ + (valid range: 0..{n})" + )] + FieldIndexOutOfRange { + /// The offending index. + field_index: u8, + /// The const-generic field count. + n: usize, + }, + /// Two [`FieldDef`] entries share the same `field_index`. Field indices + /// must be unique within the array. + #[error("FieldDef.field_index {field_index} appears more than once")] + DuplicateFieldIndex { + /// The duplicated index. + field_index: u8, + }, + /// The first field definition does not match DPDK's requirements for + /// the trie's entry byte. + /// + /// DPDK requires the first field in `field_defs` to be **one byte + /// long**; it consumes that byte during trie construction. The + /// wrapper additionally requires `input_index = 0` on the first + /// field as a convention -- it labels the entry byte as belonging + /// to the first input-index group, which simplifies the + /// runtime-load reasoning in `min_input_size`. + /// + /// The first field's **`offset` is unconstrained**: a non-zero + /// offset just means the input buffer has leading bytes before the + /// ACL key, and DPDK loads from `field_defs[0].offset` regardless. + /// `min_input_size` accounts for non-zero leading offsets via the + /// per-group load-endpoint formula. + #[error( + "the first FieldDef must be size = One and input_index = 0 \ + (got size = {size:?}, input_index = {input_index})" + )] + InvalidFirstField { + /// The first field's declared size. + size: super::field::FieldSize, + /// The first field's declared `input_index`. + input_index: u8, + }, + + /// `num_categories` is zero. + #[error("Number of categories must be at least 1")] + ZeroCategories, + + /// `num_categories` exceeds [`MAX_CATEGORIES`]. + #[error("Number of categories {num_categories} exceeds maximum of {max}")] + TooManyCategories { + /// The requested number of categories. + num_categories: u32, + /// The maximum allowed. + max: u32, + }, + + /// `num_categories` is greater than 1 and not a multiple of [`RESULTS_MULTIPLIER`]. + #[error("Number of categories {num_categories} must be 1 or a multiple of {multiplier}")] + CategoriesNotAligned { + /// The requested number of categories. + num_categories: u32, + /// The required alignment factor. + multiplier: u32, + }, + + /// A field whose `(offset, size)` extends past its `input_index` + /// group's 4-byte window. DPDK's classify loop loads exactly 4 + /// contiguous bytes per `input_index` group starting at the group's + /// lowest offset; any field spilling past that window would make + /// DPDK read bytes the caller never accounted for, undermining the + /// `min_input_size` safety contract. + #[error( + "FieldDef with input_index {input_index} spans beyond a 4-byte \ + window: lowest offset is {group_offset}, but field_index \ + {field_index} extends to offset {extent_end} (max allowed: \ + {window_end})" + )] + InvalidInputIndexGrouping { + /// The offending `input_index`. + input_index: u8, + /// The lowest `offset` of any field in the group. + group_offset: u32, + /// The `field_index` of the field whose extent overruns the window. + field_index: u8, + /// `offset + size` of the offending field. + extent_end: u32, + /// `group_offset + 4`. + window_end: u32, + }, + + /// `input_index` 0 contains more than just the first field. + /// + /// DPDK reserves `input_index = 0` for the single 1-byte first field + /// (the trie entry byte). No other field may share that group. + #[error( + "input_index 0 must contain only the first FieldDef, but \ + field_index {extra_field_index} also has input_index 0" + )] + ExtraFieldInFirstGroup { + /// The `field_index` of the second field sharing `input_index = 0`. + extra_field_index: u8, + }, + + /// A non-first `input_index` group does not cover exactly 4 contiguous + /// bytes. DPDK's runtime loads 4 bytes per group; gaps or overlaps in + /// the field coverage of a group would either let DPDK read past the + /// declared fields or build a trie node with inconsistent semantics. + #[error( + "input_index {input_index} group does not cover exactly 4 \ + contiguous bytes starting at offset {group_offset} \ + (coverage bitmask within the window: {coverage_mask:#06b}, \ + expected 0b1111)" + )] + InputIndexGroupCoverage { + /// The offending `input_index`. + input_index: u8, + /// The lowest `offset` of any field in the group. + group_offset: u32, + /// 4-bit mask of which bytes in `[group_offset, group_offset+4)` + /// are covered by some field in the group. `0b1111` is the + /// expected value. + coverage_mask: u8, + }, + + /// Two fields in the same `input_index` group overlap in the bytes + /// they cover. DPDK requires each byte in a group to be claimed by + /// at most one field. + #[error( + "input_index {input_index} group: field_index {field_index} \ + overlaps another field in the same group (overlap mask: \ + {overlap_mask:#06b})" + )] + OverlappingFieldsInGroup { + /// The offending `input_index`. + input_index: u8, + /// The `field_index` of the field that introduced the overlap. + field_index: u8, + /// 4-bit mask of the overlapping bytes within the group window. + overlap_mask: u8, + }, + + /// A field's `offset + size` (or its `input_index` group's + /// `group_offset + 4`) overflows `u32`. DPDK loads from those offsets + /// at classify time, and `min_input_size()` would have to report at + /// least that endpoint -- but a `u32` cannot represent it, which + /// would let a caller satisfy the documented buffer-size precondition + /// while DPDK still reads past the end. We reject such layouts at + /// construction time. + #[error( + "field_index {field_index} extent overflows u32: \ + offset={offset}, size={size_bytes}, would extend past u32::MAX" + )] + FieldExtentOverflow { + /// The offending `field_index`. + field_index: u8, + /// The field's offset. + offset: u32, + /// The field's size in bytes. + size_bytes: u8, + }, + + /// Fields sharing an `input_index` are not contiguous in the array. + /// + /// DPDK's `acl_build_index` records each group's data-index entry at + /// the **first occurrence** of the input_index in definition order. + /// If fields with the same `input_index` are interleaved with other + /// groups, the wrapper's `min_input_size` calculation (which assumes + /// the first occurrence is also the group's load offset) can diverge + /// from DPDK's actual load position, undermining the safety contract. + /// We require all fields sharing an `input_index` to be consecutive + /// in the `field_defs` array. + #[error( + "input_index {input_index} fields are not contiguous in the \ + field_defs array: field at array position {position} has \ + input_index {input_index} but a different input_index appeared \ + between this field and an earlier sibling" + )] + NonContiguousInputIndexGroup { + /// The offending `input_index`. + input_index: u8, + /// The array position of the field that resumed the group. + position: usize, + }, + + /// Within a contiguous `input_index` group, the fields are not in + /// strictly-ascending offset order. + /// + /// DPDK's `acl_build_index` uses the offset of the **first** field + /// in each group (in definition order) as the group's load address. + /// Requiring offset-ascending order within each group makes that + /// first occurrence also the lowest offset, so the wrapper's + /// `min_input_size` (computed from `min(offset) per group`) matches + /// DPDK's actual load position. + #[error( + "input_index {input_index} group: field at array position \ + {position} has offset {offset}, which is not strictly greater \ + than the previous field's offset {previous_offset}" + )] + GroupFieldsNotOffsetOrdered { + /// The offending `input_index`. + input_index: u8, + /// The array position of the out-of-order field. + position: usize, + /// The offending field's offset. + offset: u32, + /// The previous (in-group) field's offset. + previous_offset: u32, + }, +} + +impl AclBuildConfig { + /// Compile-time guard: `N == 0` is rejected at monomorphization so + /// `AclBuildConfig::<0>::new` fails to compile. Mirrors the symmetric + /// guards on [`Rule`][super::rule::Rule] and [`AclCreateParams`]. + const _CHECK_N_NONZERO: () = assert!(N > 0, "AclBuildConfig requires N > 0"); + + /// Compile-time guard: `N` must not exceed + /// [`MAX_FIELDS`][super::config::MAX_FIELDS]. + /// + /// Mirrors the same guard on [`AclCreateParams`] so that an + /// out-of-range `N` is rejected uniformly across the two configuration + /// types -- without this, `AclBuildConfig<65>` would compile and only + /// fall over at runtime in `AclBuildConfig::new`'s `TooManyFields` + /// branch. Forced to evaluate in `new` via a let-binding. + const _CHECK_N_FITS_MAX_FIELDS: () = assert!( + N <= MAX_FIELDS, + "AclBuildConfig requires N <= RTE_ACL_MAX_FIELDS (64)" + ); + + /// Create a validated build configuration. + /// + /// # Arguments + /// + /// * `num_categories` -- the number of result categories. Must be in + /// `1..=`[`MAX_CATEGORIES`] and either `1` or a multiple of [`RESULTS_MULTIPLIER`]. + /// * `field_defs` -- the field definitions for the rule layout (one per field). + /// * `max_size` -- maximum memory (in bytes) for compiled structures, or `0` for no limit. + /// + /// # Validation scope + /// + /// This constructor checks: + /// + /// - **First field shape**: size = 1, `input_index` = 0 (DPDK's + /// trie-entry-byte contract). `offset` is unconstrained -- the + /// first field may sit at any byte position in the input buffer, + /// and [`min_input_size`][AclBuildConfig::min_input_size] accounts + /// for leading bytes via the per-group load-endpoint formula. + /// See [`InvalidFirstField`][InvalidAclBuildConfig::InvalidFirstField] + /// for the precise contract. + /// - **`field_index` invariants**: every `field_index` is `< N`, all + /// values are unique. + /// - **`input_index = 0` group**: contains only the first field (no + /// other field may share `input_index = 0`). + /// - **Non-first `input_index` groups**: the union of fields sharing + /// the group's `input_index` covers **exactly 4 contiguous bytes** + /// with no overlaps -- matches DPDK's runtime 4-byte-per-group + /// load pattern. This is the load-bearing safety check for the + /// [`min_input_size`][AclBuildConfig::min_input_size] contract. + /// - **Categories**: `num_categories` is in `1..=MAX_CATEGORIES` and + /// either `1` or a multiple of `RESULTS_MULTIPLIER`. + /// + /// An `Ok` from this constructor does **not** imply a successful build + /// at DPDK time -- DPDK may still reject the config for reasons we do + /// not pre-check (e.g. excessive trie size with `max_size > 0`). But + /// every reason the wrapper accepts a config corresponds to a layout + /// whose `classify`-time loads stay within + /// [`min_input_size`][AclBuildConfig::min_input_size] bytes. + /// + /// [`AclBuildError::InvalidConfig`]: super::error::AclBuildError::InvalidConfig + /// + /// # Errors + /// + /// Returns [`InvalidAclBuildConfig`] if any parameter is out of range. + #[cold] + #[tracing::instrument(level = "debug")] + pub fn new( + num_categories: u32, + field_defs: [FieldDef; N], + max_size: usize, + ) -> Result { + // Force evaluation of both const assertions for this monomorphisation. + // `_CHECK_N_FITS_MAX_FIELDS` makes `N > MAX_FIELDS` a compile error, + // so the runtime branch below is unreachable for any properly + // monomorphised call; we keep the runtime check as a defence-in-depth + // (and to surface a typed `TooManyFields` error rather than a panic + // for cases where the const-assert is bypassed). + let () = Self::_CHECK_N_NONZERO; + let () = Self::_CHECK_N_FITS_MAX_FIELDS; + + if N > MAX_FIELDS { + return Err(InvalidAclBuildConfig::TooManyFields { + num_fields: N, + max: MAX_FIELDS, + }); + } + if num_categories == 0 { + return Err(InvalidAclBuildConfig::ZeroCategories); + } + if num_categories > MAX_CATEGORIES { + return Err(InvalidAclBuildConfig::TooManyCategories { + num_categories, + max: MAX_CATEGORIES, + }); + } + if num_categories > 1 && !num_categories.is_multiple_of(RESULTS_MULTIPLIER) { + return Err(InvalidAclBuildConfig::CategoriesNotAligned { + num_categories, + multiplier: RESULTS_MULTIPLIER, + }); + } + + // First field: DPDK requires size = 1 (the trie's entry byte), + // and the wrapper additionally requires input_index = 0 so that + // the entry byte sits in its own input-index group (see the + // grouping validator below). `offset` is unconstrained -- it + // simply describes where in the input buffer the entry byte + // lives; `min_input_size` accounts for any leading bytes. + // N > 0 has been checked above, so field_defs[0] is safe to index. + let first = &field_defs[0]; + if !matches!(first.size(), super::field::FieldSize::One) || first.input_index() != 0 { + return Err(InvalidAclBuildConfig::InvalidFirstField { + size: first.size(), + input_index: first.input_index(), + }); + } + + // Every field_index must be < N (DPDK uses it to index the rule's + // field array, so out-of-range reads past Rule) and unique. + // O(N^2) duplicate check is fine: N <= RTE_ACL_MAX_FIELDS = 64. + for (i, def) in field_defs.iter().enumerate() { + let fi = def.field_index(); + if (fi as usize) >= N { + return Err(InvalidAclBuildConfig::FieldIndexOutOfRange { + field_index: fi, + n: N, + }); + } + for later in &field_defs[i + 1..] { + if later.field_index() == fi { + return Err(InvalidAclBuildConfig::DuplicateFieldIndex { field_index: fi }); + } + } + } + + // No other field may share input_index = 0; that group is reserved + // for the 1-byte first field. + for def in &field_defs[1..] { + if def.input_index() == 0 { + return Err(InvalidAclBuildConfig::ExtraFieldInFirstGroup { + extra_field_index: def.field_index(), + }); + } + } + + // Validate definition-order shape: + // + // 1. Fields with the same `input_index` must appear consecutively + // in `field_defs` (no interleaving with other groups). DPDK's + // `acl_build_index` walks defs in array order and records a + // new data-index slot whenever input_index changes; an + // interleaving caller would create two separate data-index + // slots for the same logical group, breaking the + // `min_input_size` calculation. + // + // 2. Within each contiguous run, offsets must be strictly + // ascending. DPDK uses the first field's offset (in array + // order) as the group's load address; requiring + // offset-ascending order makes that first field also the + // lowest-offset field, so our `min_input_size` (computed from + // `min(offset)` per group) matches DPDK's actual load. + // + // We track each input_index's "already closed" status via a + // bitmap: once a different input_index is observed after we've + // started one, the closed bit for that one is set and a later + // re-occurrence is an error. Indexed by `input_index`, which + // fits in u8 (i.e. 0..=255). + let mut closed = [false; 256]; + let mut current_input_index: Option<(u8, u32)> = None; // (input_index, last_offset_seen) + for (pos, def) in field_defs.iter().enumerate() { + let ii = def.input_index(); + let offset = def.offset(); + match current_input_index { + Some((open_ii, last_offset)) if open_ii == ii => { + // Still inside the same group; verify offset > last_offset. + if offset <= last_offset { + return Err(InvalidAclBuildConfig::GroupFieldsNotOffsetOrdered { + input_index: ii, + position: pos, + offset, + previous_offset: last_offset, + }); + } + current_input_index = Some((ii, offset)); + } + Some((open_ii, _)) => { + // Group `open_ii` is now closed; start `ii` if it + // hasn't already been closed. + closed[open_ii as usize] = true; + if closed[ii as usize] { + return Err(InvalidAclBuildConfig::NonContiguousInputIndexGroup { + input_index: ii, + position: pos, + }); + } + current_input_index = Some((ii, offset)); + } + None => { + current_input_index = Some((ii, offset)); + } + } + } + + // Reject any field whose extent (`offset + size`) or whose + // group-load endpoint (`offset + 4`) would overflow `u32`. + // `min_input_size` reports a `usize` derived from these + // endpoints; if the u32 arithmetic saturates, the reported + // bound understates DPDK's actual read extent and the safety + // contract is broken. + for def in &field_defs { + let size_bytes = def.size() as u8 as u32; + if def.offset().checked_add(size_bytes).is_none() + || def.offset().checked_add(4).is_none() + { + return Err(InvalidAclBuildConfig::FieldExtentOverflow { + field_index: def.field_index(), + offset: def.offset(), + size_bytes: def.size() as u8, + }); + } + } + + // Validate the input_index grouping rule for non-first groups: + // every field sharing an input_index > 0 must fit inside a 4-byte + // window starting at the group's lowest offset, and the union of + // all fields in the group must cover **exactly** those 4 bytes + // with no overlap. DPDK loads 4 contiguous bytes per group at the + // group_offset; a sub-4-byte covered region would leave loaded + // bytes unattributed to any field (incorrect trie traversal), and + // an overlap would build a trie node with inconsistent semantics. + // + // O(N^2) again; N <= MAX_FIELDS = 64. Coverage tracked as a 4-bit + // mask within the group window (bit i means "byte at group_offset + i"). + // The overflow check above means `offset + size` and `group_offset + // + 4` no longer need saturation; they fit in u32 by construction. + for def in &field_defs { + let ii = def.input_index(); + if ii == 0 { + continue; // already handled above + } + // group_offset = min(field.offset for field where input_index == ii) + let mut group_offset = def.offset(); + for other in &field_defs { + if other.input_index() == ii && other.offset() < group_offset { + group_offset = other.offset(); + } + } + let extent_end = def.offset() + def.size() as u8 as u32; + let window_end = group_offset + 4; + if extent_end > window_end { + return Err(InvalidAclBuildConfig::InvalidInputIndexGrouping { + input_index: ii, + group_offset, + field_index: def.field_index(), + extent_end, + window_end, + }); + } + } + // Second pass: each non-first input_index group must cover exactly + // 4 contiguous bytes via the union of its fields, with no overlap. + // We iterate inputs once, dedup'ing by tracking the first + // appearance of each input_index. + for (anchor_idx, anchor) in field_defs.iter().enumerate() { + let ii = anchor.input_index(); + if ii == 0 { + continue; + } + // Process this input_index only at its first occurrence. + if field_defs[..anchor_idx] + .iter() + .any(|prev| prev.input_index() == ii) + { + continue; + } + // group_offset = min(field.offset for field in group) + let group_offset = field_defs + .iter() + .filter(|d| d.input_index() == ii) + .map(|d| d.offset()) + .min() + .unwrap_or(anchor.offset()); + // Accumulate the 4-bit coverage mask; reject overlaps. + let mut mask: u8 = 0; + for d in field_defs.iter().filter(|d| d.input_index() == ii) { + let shift = (d.offset() - group_offset) as u8; + let size_bits = d.size() as u8; + let field_mask = ((1u8 << size_bits) - 1) << shift; + let overlap = mask & field_mask; + if overlap != 0 { + return Err(InvalidAclBuildConfig::OverlappingFieldsInGroup { + input_index: ii, + field_index: d.field_index(), + overlap_mask: overlap, + }); + } + mask |= field_mask; + } + if mask != 0b1111 { + return Err(InvalidAclBuildConfig::InputIndexGroupCoverage { + input_index: ii, + group_offset, + coverage_mask: mask, + }); + } + } + + // Memoize the safety-critical buffer-size requirement. All + // grouping invariants have been validated above, so this loop is + // sound and the result is constant for the lifetime of the + // config. + let min_input_size = Self::compute_min_input_size(&field_defs); + + debug!( + "Created ACL build config: num_categories={num_categories}, num_fields={N}, max_size={max_size}, min_input_size={min_input_size}", + ); + + Ok(Self { + num_categories, + field_defs, + max_size, + min_input_size, + }) + } + + /// Compute the buffer-size requirement at construction time. + /// + /// See [`min_input_size`][AclBuildConfig::min_input_size] for the + /// formula and rationale. Factored out so that `new` can call it + /// once and cache the result; the public accessor returns the cached + /// value. + /// + /// Precondition: all fields' `offset + 4` fit in `u32`. This is + /// guaranteed by the `FieldExtentOverflow` check in + /// [`new`][AclBuildConfig::new], so the plain `+` below cannot + /// overflow. + fn compute_min_input_size(field_defs: &[FieldDef; N]) -> usize { + let mut max_load_end: u32 = 0; + for def in field_defs { + let ii = def.input_index(); + let mut group_offset = def.offset(); + for other in field_defs { + if other.input_index() == ii && other.offset() < group_offset { + group_offset = other.offset(); + } + } + // No saturation: `new`'s FieldExtentOverflow check has + // already verified `def.offset() + 4 <= u32::MAX` for every + // def, and `group_offset <= def.offset()`. + let load_end = group_offset + 4; + if load_end > max_load_end { + max_load_end = load_end; + } + } + max_load_end as usize + } + + /// Get the number of categories. + #[must_use] + pub fn num_categories(&self) -> u32 { + self.num_categories + } + + /// Get the field definitions. + #[must_use] + pub fn field_defs(&self) -> &[FieldDef; N] { + &self.field_defs + } + + /// Get the maximum memory size for compiled structures. + #[must_use] + pub fn max_size(&self) -> usize { + self.max_size + } + + /// The minimum size, in bytes, that an input buffer passed to + /// [`classify`][super::context::AclContext::classify] must be valid for. + /// + /// DPDK's classify loop does **not** read one field at a time at the + /// field's `offset`; it performs 4-byte aligned loads where each load's + /// starting offset is the lowest `FieldDef.offset` within an + /// `input_index` group. For every distinct `input_index` value the + /// buffer must therefore be valid for reads in + /// `[group_offset, group_offset + 4)`. This function returns the + /// maximum `group_offset + 4` across all `input_index` groups. + /// + /// The grouping invariant validated by [`new`][AclBuildConfig::new] + /// (every field's `offset + size` fits within its group's 4-byte + /// window) guarantees that this value is also at least + /// `max(field.offset + field.size)`. + /// + /// Callers of the unsafe [`classify`][super::context::AclContext::classify] + /// API should size their input buffers to at least this value to avoid + /// out-of-bounds reads. + /// + /// Computed and cached at [`new`][AclBuildConfig::new] time; + /// returning the cached value is O(1). + #[must_use] + pub fn min_input_size(&self) -> usize { + self.min_input_size + } + + /// Convert to the raw DPDK [`rte_acl_config`][dpdk_sys::rte_acl_config]. + /// + /// The returned struct is fully owned and has no lifetime dependency on `self`. + /// + /// # Stack footprint + /// + /// `rte_acl_config::defs` is a fixed-size C array of + /// `RTE_ACL_MAX_FIELDS` (= [`MAX_FIELDS`] = 64) entries -- about 0.5 KiB + /// on the stack at 8 bytes per `rte_acl_field_def`. Build is a cold + /// path, so the size is acceptable; we materialise the full array + /// because DPDK's `rte_acl_build` reads `defs[0..num_fields]` and + /// ignores entries beyond `num_fields`, but the array storage itself + /// must be present. + pub(crate) fn to_raw(&self) -> dpdk_sys::rte_acl_config { + let mut defs = [dpdk_sys::rte_acl_field_def::default(); MAX_FIELDS]; + for (i, def) in self.field_defs.iter().enumerate() { + defs[i] = dpdk_sys::rte_acl_field_def::from(def); + } + dpdk_sys::rte_acl_config { + num_categories: self.num_categories, + num_fields: N as u32, + defs, + max_size: self.max_size, + } + } +} + +impl Display for AclBuildConfig { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "AclBuildConfig<{N}> {{ num_categories: {}, max_size: {} }}", + self.num_categories, self.max_size, + ) + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::acl::field::{FieldSize, FieldType}; + + /// Test-local shorthand: build a `NonZero` from a literal that we know is non-zero. + fn nz(value: u32) -> NonZero { + NonZero::new(value).expect("test literal is non-zero") + } + + // -- AclCreateParams name validation -- + + #[test] + fn valid_name_accepted() { + let result = AclCreateParams::<5>::new("my_acl_ctx", SocketId::ANY, nz(1024)); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!(params.name(), "my_acl_ctx"); + } + + #[test] + fn empty_name_rejected() { + let result = AclCreateParams::<1>::new("", SocketId::ANY, nz(128)); + assert!(matches!(result, Err(InvalidAclName::Empty))); + } + + #[test] + fn non_ascii_name_rejected() { + // Three-character non-ASCII string (U+65E5 U+672C U+8A9E). Spelled + // out via escapes rather than a literal so source stays ASCII-only. + let result = AclCreateParams::<1>::new("\u{65E5}\u{672C}\u{8A9E}", SocketId::ANY, nz(128)); + assert!(matches!(result, Err(InvalidAclName::NotAscii))); + } + + #[test] + fn too_long_name_rejected() { + // MAX_ACL_NAME_LEN is RTE_ACL_NAMESIZE - 1 = 31 + let long_name: String = "a".repeat(MAX_ACL_NAME_LEN + 1); + let result = AclCreateParams::<1>::new(&long_name, SocketId::ANY, nz(128)); + assert!(matches!(result, Err(InvalidAclName::TooLong { .. }))); + } + + #[test] + fn max_length_name_accepted() { + let name: String = "a".repeat(MAX_ACL_NAME_LEN); + let result = AclCreateParams::<1>::new(&name, SocketId::ANY, nz(128)); + assert!(result.is_ok()); + } + + #[test] + fn name_with_null_byte_rejected() { + let result = AclCreateParams::<1>::new("hello\0world", SocketId::ANY, nz(128)); + assert!(matches!(result, Err(InvalidAclName::ContainsNullBytes))); + } + + #[test] + fn rule_size_matches_generic() { + let params = AclCreateParams::<5>::new("test", SocketId::ANY, nz(128)).unwrap(); + assert_eq!( + params.rule_size().get() as usize, + core::mem::size_of::>() + ); + } + + #[test] + fn to_raw_preserves_values() { + let params = AclCreateParams::<3>::new("raw_test", SocketId::ANY, nz(256)).unwrap(); + let raw_params = params.to_raw(); + // SAFETY: raw_params borrows from `params`, which is alive in this scope. + let raw = unsafe { *raw_params.as_ptr() }; + // Name pointer should point to the same C string data. + let raw_name = unsafe { CStr::from_ptr(raw.name) }; + assert_eq!(raw_name.to_str().unwrap(), "raw_test"); + assert_eq!(raw.max_rule_num, 256); + assert_eq!(raw.rule_size as usize, core::mem::size_of::>()); + } + + #[test] + fn display_contains_name() { + let params = AclCreateParams::<1>::new("display_test", SocketId::ANY, nz(10)).unwrap(); + let s = format!("{params}"); + assert!(s.contains("display_test"), "got: {s}"); + } + + // -- AclBuildConfig validation -- + + /// Build a valid `[FieldDef; N]` with the DPDK first-field-is-one-byte + /// constraint satisfied. + fn sample_field_defs() -> [FieldDef; N] { + core::array::from_fn(|i| { + if i == 0 { + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0) + } else { + FieldDef::new( + FieldType::Mask, + FieldSize::Four, + i as u8, + i as u8, + (i * 4) as u32, + ) + } + }) + } + + #[test] + fn valid_build_config_single_category() { + let cfg = AclBuildConfig::new(1, sample_field_defs::<5>(), 0); + assert!(cfg.is_ok()); + let cfg = cfg.unwrap(); + assert_eq!(cfg.num_categories(), 1); + assert_eq!(cfg.max_size(), 0); + assert_eq!(cfg.field_defs().len(), 5); + } + + #[test] + fn valid_build_config_multiple_categories() { + let cfg = AclBuildConfig::new(4, sample_field_defs::<3>(), 1024); + assert!(cfg.is_ok()); + assert_eq!(cfg.unwrap().num_categories(), 4); + } + + #[test] + fn zero_categories_rejected() { + let result = AclBuildConfig::new(0, sample_field_defs::<1>(), 0); + assert!(matches!(result, Err(InvalidAclBuildConfig::ZeroCategories))); + } + + #[test] + fn too_many_categories_rejected() { + let result = AclBuildConfig::new(MAX_CATEGORIES + 1, sample_field_defs::<1>(), 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::TooManyCategories { .. }) + )); + } + + #[test] + fn max_categories_accepted() { + let result = AclBuildConfig::new(MAX_CATEGORIES, sample_field_defs::<1>(), 0); + assert!(result.is_ok()); + } + + #[test] + fn misaligned_categories_rejected() { + // 3 is > 1 but not a multiple of RESULTS_MULTIPLIER (4) + let result = AclBuildConfig::new(3, sample_field_defs::<1>(), 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::CategoriesNotAligned { .. }) + )); + } + + #[test] + fn to_raw_build_config_preserves_fields() { + // Two 2-byte Range fields in input_index 1 (offsets 4 and 6) fill + // bytes [4, 8) exactly -- a valid grouping under the strict rule. + let defs: [FieldDef; 3] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Range, FieldSize::Two, 1, 1, 4), + FieldDef::new(FieldType::Range, FieldSize::Two, 2, 1, 6), + ]; + let cfg = AclBuildConfig::new(1, defs, 4096).unwrap(); + let raw = cfg.to_raw(); + assert_eq!(raw.num_categories, 1); + assert_eq!(raw.num_fields, 3); + assert_eq!(raw.max_size, 4096); + assert_eq!(raw.defs[0].type_, FieldType::Bitmask as u8); + assert_eq!(raw.defs[0].size, FieldSize::One as u8); + assert_eq!(raw.defs[0].offset, 0); + assert_eq!(raw.defs[1].type_, FieldType::Range as u8); + assert_eq!(raw.defs[1].size, FieldSize::Two as u8); + assert_eq!(raw.defs[1].offset, 4); + assert_eq!(raw.defs[2].type_, FieldType::Range as u8); + assert_eq!(raw.defs[2].size, FieldSize::Two as u8); + assert_eq!(raw.defs[2].offset, 6); + } + + #[test] + fn build_config_display() { + let cfg = AclBuildConfig::new(4, sample_field_defs::<3>(), 0).unwrap(); + let s = format!("{cfg}"); + assert!(s.contains("AclBuildConfig<3>"), "got: {s}"); + assert!(s.contains("num_categories: 4"), "got: {s}"); + } + + // Note: there is no runtime `zero_fields_rejected` test. N == 0 is + // rejected at compile time by the `_CHECK_N_NONZERO` const assertion on + // `AclBuildConfig`, so `AclBuildConfig::<0>::new(1, [], 0)` would + // fail to monomorphize. + + #[test] + fn first_field_invalid_rejected() { + // First field is Four bytes -- must be One. + let defs: [FieldDef; 2] = [ + FieldDef::new(FieldType::Mask, FieldSize::Four, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 4), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::InvalidFirstField { + size: FieldSize::Four, + input_index: 0, + }) + )); + } + + #[test] + fn field_index_out_of_range_rejected() { + // N = 2 but field_index = 5 on the second def -- DPDK would index + // past Rule<2> when looking up the field value. + let defs: [FieldDef; 2] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Four, 5, 1, 4), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::FieldIndexOutOfRange { + field_index: 5, + n: 2 + }) + )); + } + + #[test] + fn duplicate_field_index_rejected() { + // Both defs declare field_index = 0. + let defs: [FieldDef; 2] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Four, 0, 1, 4), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::DuplicateFieldIndex { field_index: 0 }) + )); + } + + #[test] + fn invalid_input_index_grouping_rejected() { + // Two fields share input_index 1, but their offsets span more than 4 + // bytes (offset 4 + offset 12 cannot both fit in [4, 8) -- field at + // offset 12 with size 4 extends to offset 16, but the group window + // is [4, 8)). + let defs: [FieldDef; 3] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 4), + FieldDef::new(FieldType::Mask, FieldSize::Four, 2, 1, 12), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::InvalidInputIndexGrouping { + input_index: 1, + group_offset: 4, + field_index: 2, + extent_end: 16, + window_end: 8, + }) + )); + } + + #[test] + fn extra_field_in_first_group_rejected() { + // Two fields share input_index 0; only field_defs[0] is allowed there. + let defs: [FieldDef; 2] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 0, 4), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::ExtraFieldInFirstGroup { + extra_field_index: 1 + }) + )); + } + + #[test] + fn undersized_group_rejected() { + // input_index 1 has a single 1-byte field; group must cover all 4 + // bytes. Coverage mask would be 0b0001 (just byte 0 of the group). + let defs: [FieldDef; 2] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Bitmask, FieldSize::One, 1, 1, 4), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::InputIndexGroupCoverage { + input_index: 1, + group_offset: 4, + coverage_mask: 0b0001, + }) + )); + } + + #[test] + fn overlapping_group_fields_rejected() { + // A 4-byte field at offset 4 followed by a 2-byte field at offset + // 6 -- both in input_index 1. Offsets are strictly ascending + // (passes ordering), but the byte ranges overlap in [6, 8). + let defs: [FieldDef; 3] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 4), + FieldDef::new(FieldType::Mask, FieldSize::Two, 2, 1, 6), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::OverlappingFieldsInGroup { input_index: 1, .. }) + )); + } + + #[test] + fn non_contiguous_input_index_group_rejected() { + // input_index 1 is interrupted by input_index 2 and then resumed. + let defs: [FieldDef; 4] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Two, 1, 1, 4), + FieldDef::new(FieldType::Mask, FieldSize::Four, 2, 2, 8), + FieldDef::new(FieldType::Mask, FieldSize::Two, 3, 1, 6), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::NonContiguousInputIndexGroup { + input_index: 1, + position: 3, + }) + )); + } + + #[test] + fn group_fields_not_offset_ordered_rejected() { + // Within input_index 1, the second field has a lower offset than + // the first. Ordering must be strictly ascending. + let defs: [FieldDef; 3] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Two, 1, 1, 6), + FieldDef::new(FieldType::Mask, FieldSize::Two, 2, 1, 4), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::GroupFieldsNotOffsetOrdered { + input_index: 1, + position: 2, + offset: 4, + previous_offset: 6, + }) + )); + } + + #[test] + fn field_extent_overflow_rejected() { + // A 4-byte field at offset = u32::MAX - 2: offset + size = u32::MAX + 2 + // overflows u32. Must be rejected at construction; otherwise + // min_input_size's u32-based computation would saturate and + // understate DPDK's actual read endpoint. + let defs: [FieldDef; 2] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, u32::MAX - 2), + ]; + let result = AclBuildConfig::new(1, defs, 0); + assert!(matches!( + result, + Err(InvalidAclBuildConfig::FieldExtentOverflow { + field_index: 1, + offset: o, + size_bytes: 4, + }) if o == u32::MAX - 2 + )); + } + + #[test] + fn min_input_size_uses_group_offsets() { + // input_index 9 group fully covers bytes [100, 104) via a 4-byte + // field. DPDK loads 4 bytes from the group_offset (100), so + // min_input_size must be 104. A formula like `input_index * 4 + + // 4` (which earlier wrapper versions used) would compute 40 and + // let DPDK read past the end of an undersized buffer. + let defs: [FieldDef; 2] = [ + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 9, 100), + ]; + let cfg = AclBuildConfig::new(1, defs, 0).expect("config should validate"); + assert_eq!( + cfg.min_input_size(), + 104, + "DPDK loads 4 bytes from group_offset = 100, so min_input_size = 104" + ); + } + + /// Property: `AclCreateParams::new` accepts a name iff it is non-empty + /// ASCII without interior NUL bytes and of length `<= MAX_ACL_NAME_LEN`. + /// Verifies the four error variants are mutually exclusive and that the + /// expected variant is produced for each rejection class. + #[test] + fn create_params_name_validation_property() { + bolero::check!() + .with_type::() + .for_each(|name: &String| { + let result = AclCreateParams::<1>::new(name.as_str(), SocketId::ANY, nz(1)); + match result { + Ok(params) => { + // Name was accepted: must satisfy all preconditions. + assert!(!name.is_empty()); + assert!(name.is_ascii()); + assert!(name.len() <= MAX_ACL_NAME_LEN); + assert!(!name.contains('\0')); + assert_eq!(params.name(), name.as_str()); + } + Err(InvalidAclName::Empty) => assert!(name.is_empty()), + Err(InvalidAclName::NotAscii) => assert!(!name.is_ascii()), + Err(InvalidAclName::TooLong { len, max }) => { + assert_eq!(len, name.len()); + assert_eq!(max, MAX_ACL_NAME_LEN); + assert!(name.len() > MAX_ACL_NAME_LEN); + } + Err(InvalidAclName::ContainsNullBytes) => { + // Reached only after Empty / NotAscii / TooLong checks + // pass, so the name is non-empty ASCII of valid length + // and must contain at least one interior NUL. + assert!(!name.is_empty()); + assert!(name.is_ascii()); + assert!(name.len() <= MAX_ACL_NAME_LEN); + assert!(name.contains('\0')); + } + } + }); + } + + /// Property: `AclBuildConfig::new` accepts `num_categories` iff it is + /// non-zero, within `MAX_CATEGORIES`, and either `1` or a multiple of + /// `RESULTS_MULTIPLIER`. + #[test] + fn build_config_num_categories_validation_property() { + bolero::check!() + .with_type::() + .for_each(|num_categories: &u32| { + let result = AclBuildConfig::new(*num_categories, sample_field_defs::<1>(), 0); + let in_range = *num_categories > 0 && *num_categories <= MAX_CATEGORIES; + let aligned = + *num_categories == 1 || (*num_categories).is_multiple_of(RESULTS_MULTIPLIER); + match result { + Ok(cfg) => { + assert!(in_range); + assert!(aligned); + assert_eq!(cfg.num_categories(), *num_categories); + } + Err(InvalidAclBuildConfig::ZeroCategories) => { + assert_eq!(*num_categories, 0); + } + Err(InvalidAclBuildConfig::TooManyCategories { + num_categories: n, + max, + }) => { + assert_eq!(n, *num_categories); + assert_eq!(max, MAX_CATEGORIES); + assert!(*num_categories > MAX_CATEGORIES); + } + Err(InvalidAclBuildConfig::CategoriesNotAligned { .. }) => { + assert!(in_range); + assert!(!aligned); + } + Err(InvalidAclBuildConfig::TooManyFields { .. }) => { + unreachable!("N=1 cannot trigger TooManyFields") + } + Err(InvalidAclBuildConfig::FieldIndexOutOfRange { .. }) + | Err(InvalidAclBuildConfig::DuplicateFieldIndex { .. }) + | Err(InvalidAclBuildConfig::InvalidFirstField { .. }) + | Err(InvalidAclBuildConfig::ExtraFieldInFirstGroup { .. }) + | Err(InvalidAclBuildConfig::InvalidInputIndexGrouping { .. }) + | Err(InvalidAclBuildConfig::InputIndexGroupCoverage { .. }) + | Err(InvalidAclBuildConfig::OverlappingFieldsInGroup { .. }) + | Err(InvalidAclBuildConfig::NonContiguousInputIndexGroup { .. }) + | Err(InvalidAclBuildConfig::GroupFieldsNotOffsetOrdered { .. }) + | Err(InvalidAclBuildConfig::FieldExtentOverflow { .. }) => { + unreachable!( + "sample_field_defs<1> produces a valid layout; field-array errors \ + are not reachable via this test" + ) + } + } + }); + } + + /// Property: `AclBuildConfig::new` accepts a `[FieldDef; N]` iff an + /// independent Rust-side oracle says all wrapper-enforced invariants + /// hold. Bolero generates a fuzzed 32-byte input, deterministically + /// constructs a `[FieldDef; 4]` from it, and checks that both the + /// validator and the oracle agree. + /// + /// The oracle is written from scratch (not copied from the impl) so + /// that a bug in either implementation will produce a disagreement. + /// Specifically catches mistakes in the ordering / contiguity / + /// coverage / overlap logic of [`AclBuildConfig::new`]. + #[test] + fn build_config_field_defs_validation_property() { + const N: usize = 4; + // 8 bytes per FieldDef * 4 fields = 32 bytes of input. + bolero::check!() + .with_type::<[u8; 32]>() + .for_each(|input: &[u8; 32]| { + let defs = field_defs_from_bytes::(input); + let actual = AclBuildConfig::new(1, defs, 0); + let expected_accept = oracle_field_defs_valid::(&defs); + match (expected_accept, actual.as_ref()) { + (true, Ok(_)) | (false, Err(_)) => {} + (true, Err(e)) => { + panic!( + "oracle accepted layout but validator rejected: {e:?}\n defs: {defs:?}" + ); + } + (false, Ok(_)) => { + panic!("oracle rejected layout but validator accepted\n defs: {defs:?}"); + } + } + }); + } + + /// Construct a `[FieldDef; N]` deterministically from raw bytes. + /// Each FieldDef consumes 8 bytes: 1 for field_type, 1 for size, 1 + /// for field_index, 1 for input_index, 4 for offset. + /// + /// `field_type` is the low 2 bits of byte 0, mapping to Mask (0), + /// Range (1), Bitmask (2). Value 3 is biased toward Mask (the + /// most common case) by also mapping it to Mask. + /// + /// `size` is the low 2 bits of byte 1, mapping to One (0/3), Two + /// (1), Four (2). + fn field_defs_from_bytes(bytes: &[u8]) -> [FieldDef; N] { + use crate::acl::field::{FieldSize, FieldType}; + core::array::from_fn(|i| { + let base = i * 8; + let ft = match bytes[base] & 0b11 { + 0 | 3 => FieldType::Mask, + 1 => FieldType::Range, + 2 => FieldType::Bitmask, + _ => unreachable!(), + }; + let sz = match bytes[base + 1] & 0b11 { + 0 | 3 => FieldSize::One, + 1 => FieldSize::Two, + 2 => FieldSize::Four, + _ => unreachable!(), + }; + let field_index = bytes[base + 2]; + let input_index = bytes[base + 3]; + let offset = u32::from_le_bytes([ + bytes[base + 4], + bytes[base + 5], + bytes[base + 6], + bytes[base + 7], + ]); + FieldDef::new(ft, sz, field_index, input_index, offset) + }) + } + + /// Independent oracle: returns `true` iff every wrapper-enforced + /// invariant on `field_defs` holds. Written from scratch (not + /// copied from `AclBuildConfig::new`) so that disagreement with the + /// impl pinpoints a bug in one or the other. + fn oracle_field_defs_valid(field_defs: &[FieldDef; N]) -> bool { + use crate::acl::field::FieldSize; + + if N == 0 || N > MAX_FIELDS { + return false; + } + + // First field: size = One, input_index = 0 (offset is unconstrained). + let first = &field_defs[0]; + if !matches!(first.size(), FieldSize::One) { + return false; + } + if first.input_index() != 0 { + return false; + } + + // field_index < N and unique. + let mut seen = [false; 256]; + for def in field_defs { + let fi = def.field_index() as usize; + if fi >= N { + return false; + } + if seen[fi] { + return false; + } + seen[fi] = true; + } + + // Per-field extent fits in u32 (no `offset + size` or + // `offset + 4` overflow). + for def in field_defs { + let size_bytes = def.size() as u8 as u32; + if def.offset().checked_add(size_bytes).is_none() + || def.offset().checked_add(4).is_none() + { + return false; + } + } + + // No other field shares input_index = 0. + for def in &field_defs[1..] { + if def.input_index() == 0 { + return false; + } + } + + // Contiguity + intra-group ordering: walk the array, track the + // current "open" input_index and the previously-seen offset for + // it. When input_index changes, mark the old one closed; if a + // later position uses an already-closed input_index, that's a + // non-contiguous group. + let mut closed = [false; 256]; + let mut open: Option<(u8, u32)> = None; + for def in field_defs { + let ii = def.input_index(); + let off = def.offset(); + match open { + Some((cur_ii, last_off)) if cur_ii == ii => { + if off <= last_off { + return false; + } + open = Some((ii, off)); + } + Some((cur_ii, _)) => { + closed[cur_ii as usize] = true; + if closed[ii as usize] { + return false; + } + open = Some((ii, off)); + } + None => { + open = Some((ii, off)); + } + } + } + + // Each non-first input_index group: per-field extent fits in a + // 4-byte window from group_offset, total coverage is exactly + // 4 bytes with no overlap. + // + // `saturating_add` here vs. plain `+` in the impl: the impl + // gates this check behind the `FieldExtentOverflow` pre-flight + // (offsets where `offset + 4` overflows are already rejected), + // so plain `+` in the impl is sound. The oracle runs the + // overflow check earlier and returns `false` on overflow too, + // so this branch is only reached for non-overflowing + // arithmetic -- but we keep `saturating_add` here as a + // defensive fence so a bug in the oracle's overflow check + // would not panic this loop while fuzzing. + for def in field_defs { + let ii = def.input_index(); + if ii == 0 { + continue; + } + // group_offset = min offset across the group. + let group_offset = field_defs + .iter() + .filter(|d| d.input_index() == ii) + .map(|d| d.offset()) + .min() + .expect("group is non-empty by construction"); + let extent_end = def.offset().saturating_add(def.size() as u8 as u32); + if extent_end > group_offset.saturating_add(4) { + return false; + } + } + // Coverage / overlap, processed once per group (at first + // occurrence in array order). + for (anchor_idx, anchor) in field_defs.iter().enumerate() { + let ii = anchor.input_index(); + if ii == 0 { + continue; + } + if field_defs[..anchor_idx] + .iter() + .any(|prev| prev.input_index() == ii) + { + continue; + } + let group_offset = field_defs + .iter() + .filter(|d| d.input_index() == ii) + .map(|d| d.offset()) + .min() + .expect("group is non-empty"); + let mut mask: u8 = 0; + for d in field_defs.iter().filter(|d| d.input_index() == ii) { + let shift = (d.offset() - group_offset) as u8; + let size_bits = d.size() as u8; + let field_mask = ((1u8 << size_bits) - 1) << shift; + if mask & field_mask != 0 { + return false; + } + mask |= field_mask; + } + if mask != 0b1111 { + return false; + } + } + + true + } +} diff --git a/dpdk/src/acl/context.rs b/dpdk/src/acl/context.rs new file mode 100644 index 0000000000..5e81675734 --- /dev/null +++ b/dpdk/src/acl/context.rs @@ -0,0 +1,1217 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! ACL context with typestate lifecycle management. +//! +//! This module provides [`AclContext`], a safe RAII wrapper around DPDK's opaque +//! [`rte_acl_ctx`][dpdk_sys::rte_acl_ctx] handle. The context uses a **typestate** pattern to +//! enforce the correct lifecycle at compile time: +//! +//! ```text +//! AclContext --build()--> AclContext +//! ^ | +//! +----------------reset()------------------+ +//! ``` +//! +//! - In the [`Configuring`] state you can add rules ([`add_rules`][AclContext::add_rules]) and +//! compile them ([`build`][AclContext::build]). Mutation methods take `&mut self`, which lets +//! the Rust borrow checker enforce DPDK's documented constraint that these operations are **not +//! thread-safe**. +//! +//! - In the [`Built`] state you can classify packets ([`classify`][AclContext::classify]). +//! Classification takes `&self`, which -- combined with the `Sync` implementation -- allows safe +//! concurrent access from multiple threads, matching DPDK's documented thread-safety guarantee +//! for [`rte_acl_classify`][dpdk_sys::rte_acl_classify]. +//! +//! The context is parameterised by a const generic `N` (the number of fields per rule). This +//! same `N` appears in [`Rule`][super::rule::Rule] and +//! [`AclBuildConfig`][super::config::AclBuildConfig], so a field-count mismatch between rules +//! and context is caught at compile time. +//! +//! # RAII +//! +//! When an [`AclContext`] is dropped (in any state), it calls +//! [`rte_acl_free`][dpdk_sys::rte_acl_free] to release all DPDK-managed memory. +//! +//! # Examples +//! +//! See the [module-level documentation][super] for a complete usage example. + +use core::fmt; +use core::mem::ManuallyDrop; +use core::ptr::NonNull; + +use concurrency::sync::{Mutex, OnceLock}; +use errno::Errno; +use tracing::{debug, error, trace}; + +use super::classify::ClassifyAlgorithm; +use super::config::{AclBuildConfig, AclCreateParams}; +use super::error::{ + AclAddRulesError, AclBuildError, AclClassifyError, AclCreateError, AclSetAlgorithmError, +}; +use super::field::FieldDef; +use super::rule::Rule; + +/// Process-wide guard for any operation that touches DPDK's global ACL +/// registry: [`AclContext::new`] (find_existing + create), [`Drop`] for +/// [`AclContext`] (free), and [`dump_all_contexts`] (list dump). +/// +/// DPDK's `rte_acl_create` does not itself fail on duplicate names: it +/// returns the **existing** context pointer for a matching name. Without +/// serialization, two threads can both observe +/// `rte_acl_find_existing -> NULL` for the same name, both call +/// `rte_acl_create`, and both receive the same pointer -- producing two +/// [`AclContext`] wrappers that race to free the same DPDK handle on drop. +/// Holding this mutex across the check-and-create sequence closes the TOCTOU. +/// Drop and list-dump take the same lock so the "registry-touching +/// operations are serialized" invariant holds at the wrapper seam. +/// +/// Why [`OnceLock`] rather than a `static` initializer: under the +/// `loom`/`shuttle` model-checker backends, `concurrency::sync::Mutex::new` +/// is not `const fn` (each instance registers with the scheduler), so a +/// `static M: Mutex<()> = Mutex::new(())` would fail to typecheck on those +/// configurations. `OnceLock` + lazy init is the portable idiom across +/// all backends. See the module docs on `concurrency::sync`. +/// +/// Why the concurrency facade rather than [`std::sync::Mutex`] directly: +/// the workspace policy is poison-as-panic ("poison is a fatal invariant +/// violation"); the facade applies that policy uniformly so call sites +/// never see `LockResult`. +/// +/// # Tracing reentrancy +/// +/// The lock is **not** reentrant. Anything that runs while a thread holds +/// this lock -- including `tracing` layers invoked by the [`debug!`] / +/// [`error!`] / `#[tracing::instrument]` macros sprinkled through the +/// surrounding methods -- must not call back into any ACL wrapper API that +/// would re-acquire it: [`AclContext::new`], [`dump_all_contexts`], or +/// dropping any [`AclContext`]. Doing so deadlocks the calling thread on +/// its own previously-acquired guard. The default `tracing-subscriber` +/// configuration never touches ACL, but custom layers (e.g. one that +/// resolves the context name from a registry lookup for log enrichment) +/// could trip this if added later. +static ACL_CREATE_LOCK: OnceLock> = OnceLock::new(); + +/// Lazy accessor for [`ACL_CREATE_LOCK`]. +fn acl_create_lock() -> &'static Mutex<()> { + ACL_CREATE_LOCK.get_or_init(|| Mutex::new(())) +} + +// --------------------------------------------------------------------------- +// Typestate markers +// --------------------------------------------------------------------------- + +/// Typestate: the context is accepting rule mutations and has not yet been compiled. +/// +/// Methods available in this state: +/// - [`add_rules`][AclContext::add_rules] (`&mut self`) +/// - [`reset_rules`][AclContext::reset_rules] (`&mut self`) +/// - [`build`][AclContext::build] (consumes `self`, transitions to [`Built`]) +/// +/// Carries the [`AclBuildConfig`] that the context was created with so +/// that [`add_rules`][AclContext::add_rules] can validate each +/// [`Rule`]'s field values against the layout (catching e.g. an +/// out-of-range prefix length before it reaches DPDK's C shift in +/// `RTE_ACL_MASKLEN_TO_BITMASK`) and [`build`][AclContext::build] can +/// dispatch with no extra arguments. +#[derive(Debug, Clone)] +pub struct Configuring { + config: AclBuildConfig, +} + +/// Typestate: the context has been compiled and is ready for packet classification. +/// +/// Methods available in this state: +/// - [`classify`][AclContext::classify] (`&self`, thread-safe) +/// - [`classify_with_algorithm`][AclContext::classify_with_algorithm] (`&self`, thread-safe) +/// - [`set_default_algorithm`][AclContext::set_default_algorithm] (`&mut self`) +/// - [`reset`][AclContext::reset] (consumes `self`, transitions back to [`Configuring`]) +/// +/// Carries the [`AclBuildConfig`] that produced this build so that +/// downstream code can query the field layout and category count without +/// recomputing or re-passing it. Read via +/// [`build_config`][AclContext::build_config]. +#[derive(Debug, Clone)] +pub struct Built { + config: AclBuildConfig, +} + +/// Sealed marker trait for valid [`AclContext`] typestates. +/// +/// Implemented for [`Configuring`] and [`Built`]. +/// +/// `Send` is a supertrait because [`AclContext`] has a blanket `unsafe +/// impl Send`; the supertrait guarantees the state's own +/// auto-trait obligations are respected (e.g. an internal typestate that +/// held an `Rc<_>` could not implement `AclState` at all, which is the +/// desired outcome). +/// +/// `Sync` is deliberately **not** a supertrait. Per-state `unsafe impl +/// Sync` blocks are the single audit gate: adding a new typestate +/// requires writing a fresh `unsafe impl Sync for AclContext` +/// (or omitting it and getting a non-`Sync` context). A `Sync` supertrait +/// would mean every state mechanically gains `Sync` just by satisfying +/// the trait bound, hiding the per-state audit. +pub trait AclState: sealed::Sealed + Send {} + +mod sealed { + /// Sealed-trait support for [`super::AclState`]. External crates cannot + /// implement this trait, so they cannot add new typestates that would + /// inherit [`Send`]/[`Sync`]. + pub trait Sealed {} + impl Sealed for super::Configuring {} + impl Sealed for super::Built {} +} + +impl AclState for Configuring {} +impl AclState for Built {} + +// --------------------------------------------------------------------------- +// Build failure +// --------------------------------------------------------------------------- + +/// Returned when [`AclContext::build`] fails. +/// +/// Because `build` consumes the [`Configuring`] context, this error wraps +/// **both** the error description and the original context so the caller can +/// recover, inspect, or drop it. The returned context is still in +/// [`Configuring`] state and **retains any rules previously added via** +/// [`add_rules`][AclContext::add_rules] -- `build` does not call +/// `rte_acl_reset_rules` on failure. Callers who want a clean slate must +/// invoke [`reset_rules`][AclContext::reset_rules] on the returned context. +/// +/// # Example +/// +/// ```ignore +/// match ctx.build() { +/// Ok(built) => { /* use built context */ } +/// Err(failure) => { +/// eprintln!("build failed: {}", failure.error); +/// // The original context is still usable; previously-added rules are +/// // still present. Clear them if you want to retry from scratch: +/// let mut ctx = failure.context; +/// ctx.reset_rules(); +/// } +/// } +/// ``` +#[derive(thiserror::Error)] +#[error("ACL build failed for context '{}'", self.context.name())] +pub struct AclBuildFailure { + /// The build error. + #[source] + pub error: AclBuildError, + /// The original context, returned in [`Configuring`] state so it can be reused or dropped. + pub context: AclContext>, +} + +// Hand-rolled Debug because `AclContext` does not derive `Debug` (and +// embedding the full context state in error logs would be noise). +impl fmt::Debug for AclBuildFailure { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("AclBuildFailure") + .field("error", &self.error) + .field("context_name", &self.context.name()) + .finish() + } +} + +// --------------------------------------------------------------------------- +// AclContext +// --------------------------------------------------------------------------- + +/// A DPDK ACL context parameterised by field count `N` and lifecycle state `State`. +/// +/// See the [module documentation][self] for an overview of the typestate lifecycle. +/// +/// # Type parameters +/// +/// - `N`: the number of fields per rule. Must match across [`Rule`][super::rule::Rule], +/// [`AclBuildConfig`][super::config::AclBuildConfig], and this context. +/// - `State`: one of [`Configuring`] or [`Built`]. Defaults to [`Configuring`] for newly created +/// contexts. +pub struct AclContext> { + /// Raw DPDK context handle. Non-null invariant maintained at all times. + ctx: NonNull, + /// The validated parameters that were used to create this context. + /// `AclCreateParams` ties the field-count to the context's `N` so a + /// mismatch is a compile-time error rather than UB at `rte_acl_add_rules` + /// time. + params: AclCreateParams, + /// Per-state data: both [`Configuring`] and [`Built`] carry the + /// [`AclBuildConfig`] (the [`Built`] copy is the one used by + /// `rte_acl_build`; the [`Configuring`] copy is used to validate rules + /// at `add_rules` time and is what `build` will pass to DPDK). + state: State, +} + +// The DPDK ACL context handle is a heap allocation -- it is not inherently tied to any particular +// thread, so `Send` is correct for any state that itself is `Send`. The +// blanket `Send` impl across `State: AclState` is fine because the trait's +// `Send` supertrait already guarantees the per-state portion is `Send`. +unsafe impl Send for AclContext {} + +// `Sync` is **not** blanket. Each typestate must explicitly opt in with +// its own `unsafe impl` so that adding a new typestate forces the author +// to write a fresh reentrancy audit. A blanket `impl +// Sync` would let a new state silently inherit Sync just by implementing +// the (sealed) `AclState` supertrait -- which would obscure the audit +// requirement. +// +// The load-bearing claim for `Sync` is that **every method on AclContext +// reachable through a shared `&self` is reentrant** -- i.e. two threads +// each holding `&self` cannot race against each other through any safe +// API. `Sync` already follows tautologically from `&mut self` discipline +// on the mutation methods; the non-trivial claim is what the `&self` +// methods do. +// +// `&self` methods reachable in any state ("all-states" impl on +// `AclContext`) and their reentrancy story: +// - `name()`, `params()`, `as_raw_ptr()` -- read-only access to immutable +// fields stored in the wrapper. Trivially reentrant. +// +// `dump()` is explicitly **not** in the `&self` set: it takes `&mut self`, +// which sidesteps any reentrancy claim against `rte_acl_dump`'s +// implementation details. Even though the current DPDK source only reads +// from the context inside `rte_acl_dump`, the `&mut self` borrow makes +// the argument robust against any future DPDK change that adds caching +// or other mutation inside the dump path. See the `dump` doc for the +// rationale. Listing `dump` here would be a documentation lie that +// could mislead a future reviewer into believing the `&self` Sync claim +// covered it. +// +// Cross-context registry mutation (Drop and `dump_all_contexts`) is +// protected by [`ACL_CREATE_LOCK`] at the Rust seam, so it does not +// participate in the per-context `&self` reentrancy story. + +// Sync impl for the [`Configuring`] state. +// +// `&self` methods reachable here are exactly the all-states ones above +// (`name`, `params`, `as_raw_ptr`). No `Configuring`-specific `&self` +// method exists; all rule mutation, `dump`, and the `build` transition +// take `&mut self` / consume `self`, which `Sync` does not concern. +unsafe impl Sync for AclContext> {} + +// Sync impl for the [`Built`] state. +// +// In addition to the all-states `&self` methods, `Built` exposes +// `classify` / `classify_with_algorithm` (DPDK documents these as +// thread-safe), `build_config`/`num_categories`/`field_defs` +// (read-only accessors into the stored config). All reentrant. +unsafe impl Sync for AclContext> {} + +// --------------------------------------------------------------------------- +// Methods available in ALL states +// --------------------------------------------------------------------------- + +impl AclContext { + /// Get the context name (as passed to [`AclCreateParams::new`]). + #[must_use] + #[inline] + pub fn name(&self) -> &str { + self.params.name() + } + + /// Get the creation parameters. + #[must_use] + #[inline] + pub fn params(&self) -> &AclCreateParams { + &self.params + } + + /// Get the raw DPDK context pointer for read-only FFI. + /// + /// Returning a raw pointer is itself a safe operation; *using* the pointer + /// in any FFI call is what is unsafe, and that obligation already lives on + /// those FFI signatures. Mirrors the safety story of + /// [`Box::as_ptr`][core::ptr] and similar std accessors. + /// + /// For DPDK calls that take `*mut rte_acl_ctx` (e.g. `rte_acl_add_rules`, + /// `rte_acl_reset`, `rte_acl_set_ctx_classify`), use + /// [`as_raw_mut_ptr`][AclContext::as_raw_mut_ptr] instead so the + /// `&mut self` requirement carries the typestate's mutability discipline + /// into raw FFI code. + /// + /// # Lifetime + /// + /// The returned pointer is valid only while `self` is alive. Raw pointers + /// in Rust do **not** carry lifetimes, so the borrow checker will not catch + /// use-after-free of this pointer past a [`Drop`] of the context. Treat + /// the result as borrowed from `&self`: pass it straight to the FFI call + /// and do not hold it across moves or drops of the context. + #[must_use] + #[inline] + pub fn as_raw_ptr(&self) -> *const dpdk_sys::rte_acl_ctx { + self.ctx.as_ptr() + } + + /// Get the raw DPDK context pointer for mutating FFI. + /// + /// Taking `&mut self` mirrors the typestate's mutability discipline: a + /// caller cannot obtain a `*mut rte_acl_ctx` from a shared borrow of the + /// context, preventing data races between concurrent + /// `rte_acl_classify` (which takes `&self`) and any mutating FFI call + /// the caller might make through this pointer. + /// + /// See [`as_raw_ptr`][AclContext::as_raw_ptr] for the lifetime caveat + /// (raw pointers do not carry lifetimes in Rust; treat this one as + /// borrowed from `&mut self`). + #[must_use] + #[inline] + pub fn as_raw_mut_ptr(&mut self) -> *mut dpdk_sys::rte_acl_ctx { + self.ctx.as_ptr() + } + + /// Dump the context's internal state to stdout via + /// [`rte_acl_dump`][dpdk_sys::rte_acl_dump]. + /// + /// This is a debugging aid. Output goes to stdout and is not captured + /// by the tracing subsystem. Under `cargo nextest`, stdout is captured + /// per test and only surfaced on failure or with `--no-capture`; under + /// `cargo test`, stdout is captured by default unless `--nocapture` is + /// passed. Either way, the output will not appear in the tracing + /// stream -- redirect or run the harness with capture disabled if you + /// need to read it interactively. + /// + /// # `&mut self` + /// + /// Takes `&mut self` rather than `&self` even though + /// [`rte_acl_dump`][dpdk_sys::rte_acl_dump] is read-only on the + /// current DPDK source. The exclusive borrow side-steps a + /// pin-to-DPDK-version reentrancy audit: any future change to DPDK + /// that adds caching inside `rte_acl_dump` would silently invalidate + /// a `&self` claim, but cannot affect `&mut self` (no other thread + /// has access to the context for the duration of the call). + #[cold] + pub fn dump(&mut self) { + // SAFETY: rte_acl_dump operates on the single context pointed + // at by `self.ctx` and does not touch the global registry, so + // no ACL_CREATE_LOCK acquisition is required. The `&mut self` + // borrow guarantees we have exclusive access to this context, + // covering any future DPDK change that adds mutation inside + // `rte_acl_dump`. + unsafe { dpdk_sys::rte_acl_dump(self.ctx.as_ptr()) } + } + + /// Decompose the context into its raw parts **without** running the destructor. + /// + /// Used internally to implement typestate transitions: the raw pointer, + /// params, and per-state data are moved out, and [`ManuallyDrop`] prevents + /// the old value's [`Drop`] from freeing the DPDK handle. + fn into_parts(self) -> (NonNull, AclCreateParams, State) { + let this = ManuallyDrop::new(self); + let ctx = this.ctx; + // SAFETY: `this` is wrapped in ManuallyDrop, so its Drop will not run + // and the fields will not be double-freed when this function returns. + // Moving `params` out via ptr::read yields exactly one owner of the + // AclCreateParams. + let params = unsafe { core::ptr::read(&this.params) }; + // SAFETY: same reasoning as the params move above -- `this` is + // ManuallyDrop, so reading `state` produces a single owner. + let state = unsafe { core::ptr::read(&this.state) }; + (ctx, params, state) + } +} + +impl fmt::Debug for AclContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("AclContext") + .field("name", &self.name()) + .field("num_fields", &N) + .field("ptr", &self.ctx) + .finish() + } +} + +impl fmt::Display for AclContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "AclContext<{N}>({:?})", self.name()) + } +} + +// --------------------------------------------------------------------------- +// Configuring state +// --------------------------------------------------------------------------- + +impl AclContext> { + /// Create a new ACL context in the [`Configuring`] state. + /// + /// This is a safe wrapper around [`rte_acl_create`][dpdk_sys::rte_acl_create]. + /// + /// # Arguments + /// + /// * `params` -- validated creation parameters (see [`AclCreateParams::new`]). + /// * `config` -- validated build parameters (see + /// [`AclBuildConfig::new`]). The context retains the config for the + /// lifetime of the [`Configuring`] state and uses it to validate + /// [`Rule`] values at [`add_rules`][AclContext::add_rules] time and + /// to dispatch [`build`][AclContext::build] without re-supplying it. + /// + /// # Errors + /// + /// Returns [`AclCreateError`] if DPDK fails to allocate the context. This + /// includes the case where the DPDK EAL has not been initialized: + /// `rte_acl_create` returns NULL with `rte_errno` set, which surfaces as + /// [`AclCreateError::InvalidParams`] or [`AclCreateError::Unknown`]. The + /// failure is graceful -- this is a regular error path, not undefined + /// behavior. + #[cold] + #[tracing::instrument(level = "debug", skip(params, config), fields(name = params.name()))] + pub fn new( + params: AclCreateParams, + config: AclBuildConfig, + ) -> Result { + // Serialize the find_existing + create sequence with a process-wide + // mutex (see [`ACL_CREATE_LOCK`]). Without this, two threads can + // both observe find_existing -> NULL, both call rte_acl_create, and + // both receive the same DPDK pointer (since rte_acl_create returns + // the existing context for a duplicate name), producing two + // AclContext wrappers that race to free the same handle on drop. + // + // Lock acquisition uses the concurrency facade, which treats poison + // as a fatal invariant violation and panics rather than handing + // back a `LockResult`. That matches the workspace policy: a + // prior holder panicking while the registry was being mutated + // leaves DPDK's TAILQ in an unknown state, and continuing + // silently could lead to use-after-free. Aborting via the + // panic is the only safe answer. + let _create_guard = acl_create_lock().lock(); + + // Pre-flight: DPDK's `rte_acl_create` silently returns the existing + // context for a duplicate name. Refuse if one is already registered. + // + // SAFETY: name_cstr returns a valid, NUL-terminated C string borrowed + // from `params`; `rte_acl_find_existing` only reads through that + // pointer and does not retain it. + let existing = unsafe { dpdk_sys::rte_acl_find_existing(params.name_cstr().as_ptr()) }; + if !existing.is_null() { + error!( + "rte_acl_find_existing found context '{}' already registered", + params.name(), + ); + return Err(AclCreateError::AlreadyExists { + name: params.name().to_owned(), + }); + } + + let raw_params = params.to_raw(); + + // SAFETY: raw_params borrows from `params` (which is on the stack and + // lives through the call), so the contained `name` pointer is valid for + // the duration of `rte_acl_create`. The `RawParams<'_>` lifetime + // statically prevents misuse. + let ctx_ptr = unsafe { dpdk_sys::rte_acl_create(raw_params.as_ptr()) }; + + let ctx = match NonNull::new(ctx_ptr) { + Some(ptr) => ptr, + None => { + let rte_errno = unsafe { dpdk_sys::rte_errno_get() }; + error!( + "rte_acl_create failed for '{}': rte_errno = {rte_errno}", + params.name(), + ); + return Err(match rte_errno { + errno::EINVAL => AclCreateError::InvalidParams, + errno::ENOMEM => AclCreateError::OutOfMemory, + other => AclCreateError::Unknown(Errno(other)), + }); + } + }; + + debug!( + "Created ACL context '{}' at {:p} (rule_size={}, max_rules={})", + params.name(), + ctx_ptr, + params.rule_size(), + params.max_rule_num(), + ); + + Ok(Self { + ctx, + params, + state: Configuring { config }, + }) + } + + /// Borrow the [`AclBuildConfig`] this context was created with. + /// + /// Symmetric with [`build_config`][AclContext::build_config] on + /// [`AclContext>`]. + #[must_use] + #[inline] + pub fn build_config(&self) -> &AclBuildConfig { + &self.state.config + } + + /// Add rules to the context. + /// + /// This is a safe wrapper around [`rte_acl_add_rules`][dpdk_sys::rte_acl_add_rules]. + /// + /// Takes `&mut self` because DPDK documents this operation as **not thread-safe**. + /// + /// # Arguments + /// + /// * `rules` -- a slice of [`Rule`] to add. Each rule must have its fields in the same + /// order as the [`FieldDef`]s that will be used at build time. + /// All field values must be in **host byte order**. + /// + /// Each rule is validated against this context's [`AclBuildConfig`] + /// (the one passed to [`AclContext::new`]) before being handed to + /// `rte_acl_add_rules`. In particular, a + /// [`FieldType::Mask`][super::field::FieldType::Mask] field whose + /// `mask_range` (interpreted as a prefix length) exceeds the field's + /// bit width is rejected here -- if it were forwarded to DPDK, the + /// `RTE_ACL_MASKLEN_TO_BITMASK` macro would perform a C shift by + /// `>= 8 * size`, which is undefined behaviour. + /// + /// # Errors + /// + /// Returns [`AclAddRulesError`] when a rule fails wrapper-side validation + /// ([`AclAddRulesError::InvalidRule`], which carries the offending + /// rule's index in the slice) or when DPDK itself rejects the rules + /// (e.g. the context is full or the rules are invalid). DPDK does + /// **not** report which rule it rejected; the wrapper-side check + /// catches the soundness-critical cases up-front, and for other + /// rejections you may need to bisect by submitting smaller + /// sub-slices. + #[cold] + #[tracing::instrument(level = "debug", skip(self, rules), fields(name = self.name(), count = rules.len()))] + pub fn add_rules(&mut self, rules: &[Rule]) -> Result<(), AclAddRulesError> { + if rules.is_empty() { + debug!("add_rules called with empty slice -- no-op"); + return Ok(()); + } + + // Wrapper-side validation against this context's AclBuildConfig. + // Catches soundness-critical mismatches (e.g. an out-of-range + // prefix length for a Mask field) before they reach DPDK's C code. + for (rule_index, rule) in rules.iter().enumerate() { + rule.validate(&self.state.config) + .map_err(|source| AclAddRulesError::InvalidRule { rule_index, source })?; + } + + // The length must fit in a u32 for the DPDK API. + let num: u32 = rules.len().try_into().map_err(|_| { + error!("Rule count {} exceeds u32::MAX", rules.len()); + AclAddRulesError::TooManyRules { len: rules.len() } + })?; + + // SAFETY: + // - `Rule` is #[repr(C)] with identical layout to `RTE_ACL_RULE_DEF(_, N)`. + // The `rte_acl_rule` type is the "base" struct with a flexible array member; the + // `rule_size` parameter passed at context creation tells DPDK the actual stride. + // - The pointer is valid for `num` consecutive `Rule` elements. + let ret = unsafe { + dpdk_sys::rte_acl_add_rules( + self.ctx.as_ptr(), + rules.as_ptr() as *const dpdk_sys::rte_acl_rule, + num, + ) + }; + + if ret != 0 { + error!( + "rte_acl_add_rules failed for '{}': ret = {ret}", + self.name(), + ); + return Err(match ret { + errno::NEG_ENOMEM => AclAddRulesError::OutOfMemory, + errno::NEG_EINVAL => AclAddRulesError::InvalidParams, + other => AclAddRulesError::Unknown(Errno(other)), + }); + } + + debug!("Added {num} rules to ACL context '{}'", self.name(),); + Ok(()) + } + + /// Delete all rules from the context without destroying compiled runtime structures. + /// + /// Safe wrapper around [`rte_acl_reset_rules`][dpdk_sys::rte_acl_reset_rules]. + /// + /// Takes `&mut self` because DPDK documents this operation as **not thread-safe**. + /// + /// # `reset_rules` vs [`reset`][AclContext::reset] + /// + /// The two reset entry points are distinguished by the state they + /// operate on: + /// + /// | method | available in | takes | clears rules | clears compiled structures | state after | + /// |--------|--------------|-------|--------------|----------------------------|-------------| + /// | `reset_rules` | [`Configuring`] | `&mut self` | yes | no (no compiled structures exist yet) | [`Configuring`] (unchanged) | + /// | [`reset`][AclContext::reset] | [`Built`] | `self` (consumes) | yes | yes (calls `rte_acl_reset`) | [`Configuring`] | + /// + /// Both keep the [`AclBuildConfig`] that was originally supplied to + /// [`AclContext::new`]; the next [`build`][AclContext::build] takes no + /// config argument. To switch to a different field layout, drop the + /// context and create a new one with the new config. + /// + /// The shape difference (`&mut self` vs consuming) is forced by the + /// typestate transition: `reset` changes the type, so it must consume + /// the value; `reset_rules` keeps the same type and so can mutate in + /// place. + #[cold] + #[tracing::instrument(level = "debug", skip(self), fields(name = self.name()))] + pub fn reset_rules(&mut self) { + // SAFETY: rte_acl_reset_rules mutates only the context pointed + // at by `self.ctx`; `&mut self` guarantees exclusive access for + // the duration of the call. + unsafe { dpdk_sys::rte_acl_reset_rules(self.ctx.as_ptr()) }; + debug!("Reset rules for ACL context '{}'", self.name()); + } + + /// Compile the rules into optimized runtime lookup structures. + /// + /// Safe wrapper around [`rte_acl_build`][dpdk_sys::rte_acl_build]. The + /// build config supplied to [`AclContext::new`] is forwarded to DPDK + /// here; this method takes no config argument. + /// + /// On success, the context transitions from [`Configuring`] to [`Built`] and is ready for + /// packet classification. + /// + /// On failure, the original context is returned inside [`AclBuildFailure`] so that the caller + /// can recover, inspect, or drop it. The rules remain intact; the caller may adjust rules + /// and try again. + /// + /// # Errors + /// + /// Returns [`AclBuildFailure`] wrapping an [`AclBuildError`] on failure. + #[cold] + #[tracing::instrument(level = "debug", skip(self), fields(name = self.name()))] + pub fn build(self) -> Result>, AclBuildFailure> { + let raw_cfg = self.state.config.to_raw(); + + // SAFETY: `raw_cfg` is a stack-local copy with no dangling pointers and lives through + // the `rte_acl_build` call. + let ret = unsafe { dpdk_sys::rte_acl_build(self.ctx.as_ptr(), &raw_cfg) }; + + if ret != 0 { + error!("rte_acl_build failed for '{}': ret = {ret}", self.name(),); + let error = match ret { + errno::NEG_ENOMEM => AclBuildError::OutOfMemory, + errno::NEG_EINVAL => AclBuildError::InvalidConfig, + errno::NEG_ERANGE => AclBuildError::ExceededMaxSize, + other => AclBuildError::Unknown(Errno(other)), + }; + return Err(AclBuildFailure { + error, + context: self, + }); + } + + debug!("Built ACL context '{}'", self.name()); + + // Transition: Configuring -> Built. The config moves from + // Configuring into Built without a clone -- both states hold the + // same logical artifact. + let (ctx, params, old_state) = self.into_parts(); + Ok(AclContext { + ctx, + params, + state: Built { + config: old_state.config, + }, + }) + } +} + +// --------------------------------------------------------------------------- +// Built state +// --------------------------------------------------------------------------- + +impl AclContext> { + /// Borrow the [`AclBuildConfig`] used to compile this context. + /// + /// Useful when classify-time code needs to know the field layout (offsets, + /// sizes) or the number of categories without threading the config through + /// the call chain. + #[must_use] + #[inline] + pub fn build_config(&self) -> &AclBuildConfig { + &self.state.config + } + + /// Get the number of categories used at build time. + /// + /// Shorthand for `self.build_config().num_categories()`. + #[must_use] + #[inline] + pub fn num_categories(&self) -> u32 { + self.state.config.num_categories() + } + + /// Borrow the field definitions used at build time. + /// + /// Shorthand for `self.build_config().field_defs()`. + #[must_use] + #[inline] + pub fn field_defs(&self) -> &[FieldDef; N] { + self.state.config.field_defs() + } + + /// Classify input data buffers against the compiled rules. + /// + /// This is the **hot-path** function and the primary reason the ACL context exists. + /// It is a thin wrapper around [`rte_acl_classify`][dpdk_sys::rte_acl_classify]; + /// the function is `unsafe` because the per-pointer buffer-size precondition + /// cannot be expressed in the type system (see the `# Safety` section below). + /// + /// Takes `&self` because DPDK documents classification as **thread-safe**. An + /// `Arc>` can be shared across threads for concurrent classification. + /// + /// # Arguments + /// + /// * `data` -- array of pointers to input data buffers. Each pointer should reference the + /// first byte of the region described by the [`FieldDef`] offsets. + /// All fields in the input buffers must be in **network byte order** (MSB). + /// * `results` -- output array to receive match results. Must have at least + /// `data.len() * categories` elements. Each result is either `0` (no match) or the + /// `userdata` value of the highest-priority matching rule for that (buffer, category) pair. + /// * `categories` -- number of match categories. Must be between 1 and + /// [`MAX_CATEGORIES`][super::config::MAX_CATEGORIES] (inclusive), and either 1 or a multiple + /// of [`RESULTS_MULTIPLIER`][super::config::RESULTS_MULTIPLIER]. + /// + /// # Errors + /// + /// Returns [`AclClassifyError::InvalidArgs`] if: + /// - The `results` slice is too small for `data.len() * categories` entries. + /// - `data.len()` exceeds `u32::MAX`. + /// - `categories` is zero, exceeds [`MAX_CATEGORIES`][super::config::MAX_CATEGORIES], + /// is not `1` or a multiple of [`RESULTS_MULTIPLIER`][super::config::RESULTS_MULTIPLIER], + /// or exceeds the [`num_categories`][super::config::AclBuildConfig::num_categories] + /// the context was built with. + /// + /// Returns the appropriate error variant if DPDK itself rejects the arguments. + /// + /// # Safety + /// + /// Every pointer in `data` must be valid for reads of at least + /// [`AclBuildConfig::min_input_size`][super::config::AclBuildConfig::min_input_size] + /// bytes, where the build config is the one returned by + /// [`build_config`][AclContext::build_config]. DPDK reads from those + /// buffers without bounds checks and a dangling, null, or too-small + /// pointer is undefined behavior. + /// + /// The bound is **wider** than `max(field.offset + field.size)`: DPDK's + /// classify loop performs 4-byte aligned loads where each load's + /// starting offset is the **lowest `FieldDef.offset` within an + /// `input_index` group** (this is what DPDK's `data_index` is built + /// from at `rte_acl_build` time). Concretely, + /// [`min_input_size`][super::config::AclBuildConfig::min_input_size] + /// returns `max(group_offset + 4)` across all `input_index` groups in + /// the field-def array, which is the upper bound on the byte offset + /// DPDK may read from. The grouping validation in + /// [`AclBuildConfig::new`][super::config::AclBuildConfig::new] + /// guarantees this is at least `max(field.offset + field.size)`, so + /// callers do not need to also account for the per-field extent + /// separately. + /// + /// The data array itself is read-only. bindgen generates `data: *mut *const + /// u8` (the loose C signature is `const uint8_t **`), but DPDK only reads + /// the array: `acl_set_flow` in `lib/eal/acl/acl_run.h` stores the pointer + /// once, and the only access site dereferences `flows->data[i]` for read. + /// The `.cast_mut()` below is a type accommodation for the bindgen signature + /// and does not license writes through it. + /// + /// A future safe wrapper could enforce this statically via `&[&[u8; STRIDE]]` + /// where `STRIDE` is derived from the field layout; deferred until a + /// concrete consumer demonstrates the shape it wants. + #[inline] + pub unsafe fn classify( + &self, + data: &[*const u8], + results: &mut [u32], + categories: u32, + ) -> Result<(), AclClassifyError> { + let num = self.validate_classify_args(data, results, categories)?; + + // SAFETY: + // - data and results slice lengths have been validated. + // - The pointer validity precondition on the individual buffers is + // forwarded to our caller via the `unsafe fn` signature. + // - The `.cast_mut()` is sound because DPDK only reads the data + // array (see the # Safety section above for the source citation). + let ret = unsafe { + dpdk_sys::rte_acl_classify( + self.ctx.as_ptr(), + data.as_ptr().cast_mut(), + results.as_mut_ptr(), + num, + categories, + ) + }; + + if ret != 0 { + trace!( + "rte_acl_classify returned {ret} for context '{}'", + self.name(), + ); + return Err(match ret { + errno::NEG_EINVAL => AclClassifyError::InvalidArgs, + other => AclClassifyError::Unknown(Errno(other)), + }); + } + + Ok(()) + } + + /// Classify input data buffers using a specific SIMD algorithm. + /// + /// Identical to [`classify`][AclContext::classify] except that the caller explicitly selects + /// the classification algorithm instead of using the context's default. + /// + /// Thin wrapper around + /// [`rte_acl_classify_alg`][fn@dpdk_sys::rte_acl_classify_alg], except + /// when `algorithm == ClassifyAlgorithm::Default`: see the + /// "[`Default`][ClassifyAlgorithm::Default] is special" note below. + /// + /// # `Default` is special + /// + /// `rte_acl_classify_alg(ctx, ..., RTE_ACL_CLASSIFY_DEFAULT)` dispatches + /// table slot 0 in DPDK's classify dispatch table, which is the + /// **scalar** implementation -- not "DPDK's best available". Only + /// [`rte_acl_set_ctx_classify`][dpdk_sys::rte_acl_set_ctx_classify] + /// expands `Default` to the best available variant on the current CPU. + /// To honour the "use the context's default algorithm" intent without + /// silently forcing scalar, this wrapper dispatches through + /// [`rte_acl_classify`] (which uses the context's currently-set + /// algorithm) when `algorithm == ClassifyAlgorithm::Default`. Any + /// other variant goes directly to `rte_acl_classify_alg`. + /// + /// [`rte_acl_classify`]: dpdk_sys::rte_acl_classify + /// + /// # Arguments + /// + /// See [`classify`][AclContext::classify] for `data`, `results`, and `categories`. + /// + /// * `algorithm` -- the SIMD implementation to use for this call. The caller + /// is responsible for ensuring the selected algorithm is supported on the + /// current CPU; see the `# Safety` section below. + /// + /// # Errors + /// + /// Same as [`classify`][AclContext::classify], plus + /// [`AclClassifyError::NotSupported`] if the underlying + /// `rte_acl_classify_alg` returns `-ENOTSUP` (typically because a non-stub + /// SIMD slot was selected but DPDK still reported it as unsupported). + /// + /// # Safety + /// + /// Same pointer-validity precondition as [`classify`][AclContext::classify], plus: + /// + /// `algorithm` must be implemented and runnable on the current CPU. + /// Unlike + /// [`set_default_algorithm`][AclContext::set_default_algorithm] (which + /// delegates to `rte_acl_set_ctx_classify` and which validates against + /// the per-CPU capability table before installing the algorithm), + /// [`rte_acl_classify_alg`][fn@dpdk_sys::rte_acl_classify_alg] does + /// **not** pre-check feature support; it dispatches straight through + /// the classify function-pointer table. Selecting a real SIMD variant + /// that the host does not implement therefore executes unsupported + /// instructions (SIGILL or silent corruption) rather than returning an + /// error. + /// + /// `ClassifyAlgorithm::Scalar` is always safe. + /// `ClassifyAlgorithm::Default` is also safe and is routed through + /// `rte_acl_classify` (see the "`Default` is special" section above), so + /// it picks up whatever variant `set_default_algorithm` previously + /// vetted. Every other variant requires the caller to confirm CPU + /// support out-of-band (e.g. via `is_x86_feature_detected!` or + /// `std::arch::is_aarch64_feature_detected!`). + /// + /// Note that an unsupported-but-stubbed-out slot (DPDK ships scalar + /// fallbacks for some entries on builds where the SIMD codepath was + /// disabled) will return `-ENOTSUP` through the FFI, surfacing as + /// [`AclClassifyError::NotSupported`] -- the unsafe contract is about + /// the case where the slot is a real, non-stub SIMD entry whose + /// instructions the CPU cannot execute. + #[inline] + pub unsafe fn classify_with_algorithm( + &self, + data: &[*const u8], + results: &mut [u32], + categories: u32, + algorithm: ClassifyAlgorithm, + ) -> Result<(), AclClassifyError> { + // See doc comment: `Default` through `rte_acl_classify_alg` would + // pin table slot 0 (scalar) rather than "the context's default". + // Dispatch through `rte_acl_classify` instead so the call honours + // whatever the context was last configured with. (Argument + // validation runs once, inside the delegated `classify`.) + if matches!(algorithm, ClassifyAlgorithm::Default) { + // SAFETY: same as classify; caller upholds the pointer validity + // precondition. + return unsafe { self.classify(data, results, categories) }; + } + + let num = self.validate_classify_args(data, results, categories)?; + + // SAFETY: same as classify; additionally `algorithm` maps to a valid + // rte_acl_classify_alg constant by construction. + let ret = unsafe { + dpdk_sys::rte_acl_classify_alg( + self.ctx.as_ptr(), + data.as_ptr().cast_mut(), + results.as_mut_ptr(), + num, + categories, + algorithm.into(), + ) + }; + + if ret != 0 { + trace!( + "rte_acl_classify_alg({algorithm}) returned {ret} for context '{}'", + self.name(), + ); + return Err(match ret { + errno::NEG_EINVAL => AclClassifyError::InvalidArgs, + errno::NEG_ENOTSUP => AclClassifyError::NotSupported, + other => AclClassifyError::Unknown(Errno(other)), + }); + } + + Ok(()) + } + + /// Set the default classification algorithm for future calls to + /// [`classify`][AclContext::classify]. + /// + /// This is a safe wrapper around + /// [`rte_acl_set_ctx_classify`][dpdk_sys::rte_acl_set_ctx_classify]. + /// + /// Takes `&mut self` because DPDK takes a `*mut rte_acl_ctx`, indicating the context is + /// mutated. Requiring exclusive access prevents data races with concurrent + /// [`classify`][AclContext::classify] calls. + /// + /// # Interaction with [`Arc`][std::sync::Arc] + /// + /// The `&mut self` requirement means a context that has been wrapped in + /// [`Arc`][std::sync::Arc] (the typical pattern for sharing a + /// [`Built`] context across classification threads) is no longer + /// reachable for `set_default_algorithm`. Call this **before** wrapping + /// the context in an `Arc`, or use + /// [`classify_with_algorithm`][AclContext::classify_with_algorithm] to + /// override the algorithm on individual calls without mutating the + /// shared context. + /// + /// # Errors + /// + /// Returns [`AclSetAlgorithmError`] if the algorithm is unsupported or the parameters are + /// invalid. + #[cold] + #[tracing::instrument(level = "debug", skip(self), fields(name = self.name()))] + pub fn set_default_algorithm( + &mut self, + algorithm: ClassifyAlgorithm, + ) -> Result<(), AclSetAlgorithmError> { + // SAFETY: `algorithm.into()` yields a valid rte_acl_classify_alg constant by + // construction. + let ret = + unsafe { dpdk_sys::rte_acl_set_ctx_classify(self.ctx.as_ptr(), algorithm.into()) }; + + if ret != 0 { + error!( + "rte_acl_set_ctx_classify({algorithm}) failed for '{}': ret = {ret}", + self.name(), + ); + return Err(match ret { + errno::NEG_EINVAL => AclSetAlgorithmError::InvalidParams, + errno::NEG_ENOTSUP => AclSetAlgorithmError::NotSupported, + other => AclSetAlgorithmError::Unknown(Errno(other)), + }); + } + + debug!( + "Set default classify algorithm to {algorithm} for ACL context '{}'", + self.name(), + ); + Ok(()) + } + + /// Reset the context, clearing **both** rules and compiled runtime + /// structures, and transition back to the [`Configuring`] state. + /// + /// Safe wrapper around [`rte_acl_reset`][dpdk_sys::rte_acl_reset]. The + /// [`AclBuildConfig`] is retained (it lives on the [`Configuring`] + /// state just as on [`Built`]), so the next + /// [`build`][AclContext::build] requires no fresh config argument. If + /// the caller wants to switch to a different field layout, they should + /// drop the context and construct a new one with the desired config. + /// + /// See [`reset_rules`][AclContext::reset_rules] for the matching method on + /// [`Configuring`] contexts and a comparison table. + /// + /// The returned context has no rules and no compiled structures, but + /// the same field layout as before; ready for new rules to be added + /// via [`add_rules`][AclContext::add_rules]. + #[cold] + #[tracing::instrument(level = "debug", skip(self), fields(name = self.name()))] + pub fn reset(self) -> AclContext> { + // SAFETY: rte_acl_reset mutates only the context pointed at by + // `self.ctx`; consuming `self` by value guarantees no other + // reference to this context can be in use. + unsafe { dpdk_sys::rte_acl_reset(self.ctx.as_ptr()) }; + + debug!("Reset ACL context '{}'", self.name()); + + // Transition: Built -> Configuring. Carry the config forward; the + // post-reset context still describes the same field layout. + let (ctx, params, old_state) = self.into_parts(); + AclContext { + ctx, + params, + state: Configuring { + config: old_state.config, + }, + } + } + + /// Validate the arguments common to both classify methods. + /// + /// Returns the validated `num` value as `u32` on success. + /// + /// `categories` is checked against DPDK's documented bounds **before** we + /// hand it to FFI. DPDK uses `categories` to index into per-thread runtime + /// arrays sized to [`RTE_ACL_MAX_CATEGORIES`][dpdk_sys::RTE_ACL_MAX_CATEGORIES], + /// so out-of-bound values can overflow C-side state and are not safe to + /// forward. + #[inline] + fn validate_classify_args( + &self, + data: &[*const u8], + results: &[u32], + categories: u32, + ) -> Result { + // `categories` must be in the closed range [1, MAX_CATEGORIES] and + // either 1 or a multiple of RESULTS_MULTIPLIER -- the same constraints + // applied by AclBuildConfig::new at build time. We re-check here + // because the categories value at classify time is independent of the + // build's num_categories and is otherwise unconstrained input. + use super::config::{MAX_CATEGORIES, RESULTS_MULTIPLIER}; + if categories == 0 { + error!("classify categories must be at least 1"); + return Err(AclClassifyError::InvalidArgs); + } + if categories > MAX_CATEGORIES { + error!( + "classify categories {categories} exceeds RTE_ACL_MAX_CATEGORIES ({MAX_CATEGORIES})", + ); + return Err(AclClassifyError::InvalidArgs); + } + if categories != 1 && !categories.is_multiple_of(RESULTS_MULTIPLIER) { + error!( + "classify categories {categories} must be 1 or a multiple of \ + RTE_ACL_RESULTS_MULTIPLIER ({RESULTS_MULTIPLIER})", + ); + return Err(AclClassifyError::InvalidArgs); + } + // `categories` must not exceed the value supplied at build time. + // The trie's per-node result slots are sized to `num_categories`; + // passing `categories > num_categories` would make DPDK's classify + // loop read past those slots into adjacent trie memory. DPDK does + // not validate this itself, so we close the hole here. Passing + // `categories < num_categories` is permitted and just truncates + // the results (one valid use case is a multi-category build that + // a particular caller only wants the first category from). + let built_num_categories = self.state.config.num_categories(); + if categories > built_num_categories { + error!( + "classify categories {categories} exceeds build-time num_categories ({built_num_categories})", + ); + return Err(AclClassifyError::InvalidArgs); + } + + // The number of input buffers must fit in u32. + let num: u32 = data.len().try_into().map_err(|_| { + error!("Input buffer count {} exceeds u32::MAX", data.len()); + AclClassifyError::InvalidArgs + })?; + + // The results slice must be large enough for `num * categories` entries. + let required = (num as usize) + .checked_mul(categories as usize) + .ok_or_else(|| { + error!("Overflow computing required results size: {num} * {categories}",); + AclClassifyError::InvalidArgs + })?; + + if results.len() < required { + error!( + "Results slice too small: have {}, need {} ({num} buffers * {categories} categories)", + results.len(), + required, + ); + return Err(AclClassifyError::InvalidArgs); + } + + Ok(num) + } +} + +// --------------------------------------------------------------------------- +// RAII: Drop +// --------------------------------------------------------------------------- + +// Drop takes [`ACL_CREATE_LOCK`] before calling `rte_acl_free` (see the +// comment on that static). ACL contexts are expected to be long-lived +// (created during setup, dropped at shutdown), so this serialisation has +// no practical cost. If a future caller drops `AclContext`s on a hot +// path, the contention with concurrent `AclContext::new` and +// `dump_all_contexts` calls becomes visible -- prefer to keep contexts +// alive for their useful lifetime instead. +// +// Reentrancy invariant: the lock is a non-reentrant `Mutex<()>`, so an +// `AclContext` must **not** be dropped on a thread that already holds +// [`ACL_CREATE_LOCK`] -- doing so would deadlock the current thread on +// its own previously-acquired guard. In practice this can only happen +// in pathological setups (e.g. a caller manually acquires the lock by +// poking module-private state); the wrapper itself never holds the +// lock across a region that could free an `AclContext`. +impl Drop for AclContext { + fn drop(&mut self) { + debug!("Freeing ACL context '{}'", self.name()); + // Serialize the rte_acl_free call against AclContext::new and + // dump_all_contexts via the same process-wide mutex (see + // [`ACL_CREATE_LOCK`]). DPDK's `rte_acl_free` removes the + // context's entry from the global TAILQ; without this lock, an + // interleaving with a concurrent `find_existing`-then-`create` in + // another thread could observe a half-removed entry. + // + // The facade panics on poison. Dropping while another holder + // panicked mid-operation means the DPDK registry may be in an + // unknown state; aborting via the panic is the only safe answer. + let _guard = acl_create_lock().lock(); + // SAFETY: rte_acl_free is safe to call on any valid context pointer; `Drop` runs at + // most once per `AclContext`, and the create-lock acquired above serialises against + // `rte_acl_create` / `dump_all_contexts`. + unsafe { dpdk_sys::rte_acl_free(self.ctx.as_ptr()) }; + } +} + +// --------------------------------------------------------------------------- +// Module-level utilities +// --------------------------------------------------------------------------- + +/// Dump information about **all** ACL contexts to stdout. +/// +/// This is a debugging aid that calls [`rte_acl_list_dump`][dpdk_sys::rte_acl_list_dump]. +/// Output goes directly to stdout and is not captured by the tracing subsystem. +/// +/// # Thread safety +/// +/// Holds the same process-wide ACL registry mutex used by +/// [`AclContext::new`] and [`AclContext`] drops, so the list-walking +/// inside `rte_acl_list_dump` does not race against concurrent registry +/// mutation elsewhere in the process. +#[cold] +pub fn dump_all_contexts() { + // See the locking rationale on Drop / AclContext::new. The dump walks + // DPDK's global TAILQ of contexts; concurrent registry mutation would + // expose a list in an inconsistent state to the walk. Facade panics + // on poison (workspace policy -- a prior holder panic implies the + // registry may be inconsistent). + let _guard = acl_create_lock().lock(); + // SAFETY: rte_acl_list_dump takes no arguments and simply iterates an internal list. + unsafe { dpdk_sys::rte_acl_list_dump() } +} diff --git a/dpdk/src/acl/error.rs b/dpdk/src/acl/error.rs new file mode 100644 index 0000000000..87c5de1cf2 --- /dev/null +++ b/dpdk/src/acl/error.rs @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Error types for ACL operations. +//! +//! Each fallible ACL operation has a dedicated error type following the project's error handling +//! guidelines. Errors are strongly typed enums rather than strings or bare numeric codes. + +use errno::Errno; + +/// Ways in which an ACL context name can be invalid. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, thiserror::Error)] +pub enum InvalidAclName { + /// The name is not valid ASCII. + #[error("ACL context name must be valid ASCII")] + NotAscii, + /// The name is too long (exceeds [`RTE_ACL_NAMESIZE`][dpdk_sys::RTE_ACL_NAMESIZE]). + #[error("ACL context name is too long ({len} > {max} bytes)")] + TooLong { + /// The length of the name that was provided. + len: usize, + /// The maximum allowed length. + max: usize, + }, + /// The name is empty. + #[error("ACL context name must not be empty")] + Empty, + /// The name contains interior null bytes. + #[error("ACL context name must not contain null bytes")] + ContainsNullBytes, +} + +/// Errors that can occur when creating an ACL context via [`rte_acl_create`][dpdk_sys::rte_acl_create]. +#[derive(Debug, thiserror::Error)] +pub enum AclCreateError { + /// The context name failed validation. + #[error("Invalid ACL context name: {0}")] + InvalidName(#[from] InvalidAclName), + /// A context with this name already exists in DPDK's global registry. + /// + /// DPDK's [`rte_acl_create`][dpdk_sys::rte_acl_create] silently returns the + /// existing context for a duplicate name rather than failing. Returning + /// that pointer wrapped in a new [`AclContext`][super::context::AclContext] + /// would create two owning wrappers for the same DPDK handle, leading to + /// use-after-free when the first one is dropped. We refuse the call + /// instead. + /// + /// Detection is reliable against concurrent calls to + /// [`AclContext::new`][super::context::AclContext::new] within the same + /// process: a module-private mutex serializes the + /// `rte_acl_find_existing` + `rte_acl_create` pair. Concurrent calls to + /// `rte_acl_create` from outside this wrapper (e.g. another C/C++ + /// library linked into the same process) can still race. + /// + /// As a workspace-level invariant, **nothing else in this process is + /// permitted to call `rte_acl_create` / `rte_acl_free` directly**. If + /// a future DPDK PMD or third-party library is added that touches the + /// global ACL registry, the wrapper's lock must be either lifted into + /// a coordination primitive that the new caller honours, or replaced + /// by a different scheme. Touch + /// [`ACL_CREATE_LOCK`][super::context] when revisiting. + #[error("An ACL context named '{name}' already exists")] + AlreadyExists { + /// The name that collided. + name: String, + }, + /// DPDK returned `EINVAL` -- one or more parameters are invalid. + #[error("Invalid ACL creation parameters")] + InvalidParams, + /// DPDK returned `ENOMEM` -- insufficient memory to allocate the context. + #[error("Not enough memory to create ACL context")] + OutOfMemory, + /// DPDK set an `rte_errno` value that does not match any documented error for this call. + #[error("Unknown error creating ACL context: {0:?}")] + Unknown(Errno), +} + +/// Errors that can occur when adding rules via [`rte_acl_add_rules`][dpdk_sys::rte_acl_add_rules]. +#[derive(Debug, thiserror::Error)] +pub enum AclAddRulesError { + /// The caller-supplied slice contains more than `u32::MAX` rules, which + /// cannot be represented in the DPDK FFI's `num` argument. Distinct + /// from [`InvalidParams`][AclAddRulesError::InvalidParams] (which is + /// DPDK's own validation failure), this is a pre-flight length check + /// in the Rust wrapper. + #[error("Rule slice length {len} exceeds u32::MAX")] + TooManyRules { + /// The offending slice length. + len: usize, + }, + /// A rule's [`AclField`] values are inconsistent with the + /// [`AclBuildConfig`] in effect. Caught in the Rust wrapper before the + /// call would reach `rte_acl_add_rules`; see [`InvalidRule`] for the + /// per-violation details. + /// + /// [`AclField`]: super::rule::AclField + /// [`AclBuildConfig`]: super::config::AclBuildConfig + #[error("rule {rule_index} is invalid for the configured field layout: {source}")] + InvalidRule { + /// Position of the offending rule within the caller's slice. + rule_index: usize, + /// The specific violation. + #[source] + source: InvalidRule, + }, + /// DPDK returned `ENOMEM` -- not enough space in the context for the new rules. + #[error("No space for additional rules in ACL context")] + OutOfMemory, + /// DPDK returned `EINVAL` -- one or more rule parameters are invalid. + #[error("Invalid rule parameters")] + InvalidParams, + /// DPDK returned an undocumented error code. + #[error("Unknown error adding rules: {0:?}")] + Unknown(Errno), +} + +/// Per-rule validation failure, reported as the cause of +/// [`AclAddRulesError::InvalidRule`]. +/// +/// Catching these in Rust (rather than relying on DPDK's later rejection at +/// build time) avoids reaching C code paths that would shift by an +/// out-of-range amount or otherwise invoke undefined behaviour on invalid +/// rule data. +#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)] +pub enum InvalidRule { + /// A [`FieldType::Mask`][super::field::FieldType::Mask] field's + /// `mask_range` (interpreted as a prefix length) exceeds the field's + /// bit width. DPDK's `RTE_ACL_MASKLEN_TO_BITMASK` would perform a C + /// shift by an amount `>= 8 * size`, which is undefined behaviour. + #[error( + "Mask field at field_index {field_index}: prefix length \ + {prefix_length} exceeds the field's bit width ({max_bits})" + )] + PrefixLengthOutOfRange { + /// The `field_index` of the offending field. + field_index: u8, + /// The caller-supplied prefix length. + prefix_length: u64, + /// `8 * size_bytes`. + max_bits: u32, + }, + /// A [`FieldType::Range`][super::field::FieldType::Range] field has + /// `value > mask_range`. DPDK interprets `value` as the inclusive low + /// bound and `mask_range` as the inclusive high bound, so the range + /// would be empty. + #[error( + "Range field at field_index {field_index}: low bound {low} \ + exceeds high bound {high}" + )] + RangeReversed { + /// The `field_index` of the offending field. + field_index: u8, + /// The low bound (`value`). + low: u64, + /// The high bound (`mask_range`). + high: u64, + }, + /// The rule's `category_mask` has bits set at positions + /// `>= config.num_categories()`. DPDK silently masks out those bits + /// at build time, which would make the rule apply to fewer + /// categories than the caller intended. Surfacing this at + /// `add_rules` time avoids the silent-narrowing footgun. + #[error( + "category_mask {category_mask:#010x} has bits set beyond \ + num_categories ({num_categories}); offending bits: {extra_bits:#010x}" + )] + CategoryMaskExceedsNumCategories { + /// The rule's category mask. + category_mask: u32, + /// The build config's `num_categories`. + num_categories: u32, + /// `category_mask & !((1 << num_categories) - 1)`, the bits that + /// DPDK would mask off. + extra_bits: u32, + }, +} + +/// Errors that can occur when building the ACL context via [`rte_acl_build`][dpdk_sys::rte_acl_build]. +/// +/// Recovery: any of these variants is reported through +/// [`AclBuildFailure`][super::context::AclBuildFailure], which carries the +/// original [`AclContext`][super::context::AclContext] back to the caller in +/// the [`Configuring`][super::context::Configuring] state. The Rust typestate +/// is reset (we did not call `rte_acl_build`'s success path), but the +/// **DPDK-side rule list is left intact** -- previously-added rules remain +/// loaded. Callers who want a clean slate must call +/// [`reset_rules`][super::context::AclContext::reset_rules] on the returned +/// context before retrying. +#[derive(Debug, thiserror::Error)] +pub enum AclBuildError { + /// DPDK returned `ENOMEM` -- not enough memory to build the runtime structures. + #[error("Not enough memory to build ACL context")] + OutOfMemory, + /// DPDK returned `EINVAL` -- the build configuration is invalid. + #[error("Invalid ACL build configuration")] + InvalidConfig, + /// DPDK returned `ERANGE` -- the compiled runtime structures exceeded + /// [`AclBuildConfig::max_size`][super::config::AclBuildConfig::max_size]. + /// Raise the limit or simplify the rule set, then retry on the + /// recovered context (see [`AclBuildFailure`][super::context::AclBuildFailure]). + #[error("ACL runtime structures exceeded the configured max_size")] + ExceededMaxSize, + /// DPDK returned an undocumented error code from `rte_acl_build`. + #[error("ACL build failed: {0:?}")] + Unknown(Errno), +} + +/// Errors that can occur during classification via +/// [`rte_acl_classify`][dpdk_sys::rte_acl_classify]. +#[derive(Debug, thiserror::Error)] +pub enum AclClassifyError { + /// DPDK returned `EINVAL` -- the classify arguments are invalid. + /// + /// Common causes: + /// - `categories` is zero, greater than [`RTE_ACL_MAX_CATEGORIES`][dpdk_sys::RTE_ACL_MAX_CATEGORIES], + /// or not a multiple of [`RTE_ACL_RESULTS_MULTIPLIER`][dpdk_sys::RTE_ACL_RESULTS_MULTIPLIER]. + /// - The `results` slice is too small for `num * categories` entries. + #[error("Invalid classify arguments")] + InvalidArgs, + /// DPDK returned `ENOTSUP` -- the requested classification algorithm + /// is not supported on this CPU. Only reachable through + /// [`classify_with_algorithm`][super::context::AclContext::classify_with_algorithm]; + /// the default-algorithm path returns the context's previously-set + /// algorithm, which has already been vetted by + /// [`set_default_algorithm`][super::context::AclContext::set_default_algorithm]. + #[error("Requested classification algorithm is not supported on this CPU")] + NotSupported, + /// DPDK returned an undocumented error code. + #[error("Unknown error during classification: {0:?}")] + Unknown(Errno), +} + +/// Errors that can occur when setting the classification algorithm via +/// [`rte_acl_set_ctx_classify`][dpdk_sys::rte_acl_set_ctx_classify]. +#[derive(Debug, thiserror::Error)] +pub enum AclSetAlgorithmError { + /// DPDK returned `EINVAL` -- the parameters are invalid. + #[error("Invalid algorithm or context")] + InvalidParams, + /// The requested algorithm is not supported on this CPU. + #[error("Requested classification algorithm is not supported on this platform")] + NotSupported, + /// DPDK returned an undocumented error code. + #[error("Unknown error setting classification algorithm: {0:?}")] + Unknown(Errno), +} diff --git a/dpdk/src/acl/field.rs b/dpdk/src/acl/field.rs new file mode 100644 index 0000000000..89d6847417 --- /dev/null +++ b/dpdk/src/acl/field.rs @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! ACL field definition types. +//! +//! These types provide safe, strongly-typed wrappers around DPDK's [`rte_acl_field_def`] and the +//! associated `RTE_ACL_FIELD_TYPE_*` constants. +//! +//! Using Rust enums for [`FieldType`] and [`FieldSize`] makes it impossible to construct an +//! invalid field definition at the type level -- there is no representation for, say, a 3-byte +//! field or an undefined comparison type. +//! +//! [`rte_acl_field_def`]: dpdk_sys::rte_acl_field_def + +use core::fmt::{Display, Formatter}; + +/// The comparison semantics for an ACL field. +/// +/// Each field in an ACL rule is compared against input data using one of three +/// strategies. The choice of strategy also determines how the `mask_range` +/// value in [`AclField`][super::rule::AclField] is interpreted (see the +/// constructor docs on [`AclField`][super::rule::AclField] for the +/// type-vs-`mask_range` mapping). +/// +/// Maps to the `RTE_ACL_FIELD_TYPE_*` constants. +#[repr(u8)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum FieldType { + /// Prefix-length match. + /// + /// `mask_range` holds the **prefix length** -- the number of + /// most-significant bits to compare. DPDK derives the bitmask internally + /// from the prefix length and the field size. + /// + /// Examples (for a 4-byte field): + /// - `32` -- exact match on all 32 bits. + /// - `24` -- IPv4 `/24` (compare the top 24 bits only). + /// - `0` -- wildcard (matches anything). + /// + /// Corresponds to [`RTE_ACL_FIELD_TYPE_MASK`][dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_MASK]. + Mask = 0, + + /// Range match. + /// + /// The comparison is: `low <= input <= high`. `value` is the low bound + /// and `mask_range` is the high bound. Typically used for port ranges. + /// + /// Corresponds to [`RTE_ACL_FIELD_TYPE_RANGE`][dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_RANGE]. + Range = 1, + + /// Bitmask match. + /// + /// The comparison is: `(input & mask_range) == value`. `mask_range` + /// holds the bitmask applied to the input before comparison with + /// `value`. Typically used for flag-style fields (TCP flags, protocol + /// numbers with don't-care bits, etc.). + /// + /// Example: to match a TCP protocol number (`6`) exactly, use `value = 6` + /// and `mask_range = 0xFF`. + /// + /// Corresponds to [`RTE_ACL_FIELD_TYPE_BITMASK`][dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_BITMASK]. + Bitmask = 2, +} + +impl Display for FieldType { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + match self { + FieldType::Mask => write!(f, "Mask"), + FieldType::Range => write!(f, "Range"), + FieldType::Bitmask => write!(f, "Bitmask"), + } + } +} + +/// Valid byte widths for an ACL field. +/// +/// DPDK restricts ACL field sizes to 1, 2, or 4 bytes per +/// [`FieldDef`] within a single `input_index` group. The C library also +/// supports 8-byte logical fields by spanning two adjacent 4-byte groups, +/// but the wrapper does not model that split-load behaviour, so +/// [`AclBuildConfig::new`][super::config::AclBuildConfig::new] rejects +/// any layout that would have required it. `FieldSize` therefore omits +/// `Eight` to keep "constructible width" and "build-valid width" in sync. +#[repr(u8)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum FieldSize { + /// 1 byte (e.g. IP protocol number). + One = 1, + /// 2 bytes (e.g. TCP/UDP port). + Two = 2, + /// 4 bytes (e.g. IPv4 address). + Four = 4, +} + +impl Display for FieldSize { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", *self as u8) + } +} + +/// Definition of a single field within an ACL rule. +/// +/// This is the safe Rust equivalent of [`rte_acl_field_def`][dpdk_sys::rte_acl_field_def]. +/// A collection of field definitions describes the overall layout of rules and input data for an +/// ACL context. +/// +/// # Input grouping +/// +/// For performance reasons the inner loop of the DPDK ACL search function is unrolled to process +/// four input bytes at a time. Fields must therefore be grouped into sets of 4 consecutive bytes +/// via the [`input_index`][FieldDef::input_index] value. The first input byte is processed as +/// part of setup, so subsequent groups must be aligned to 4-byte boundaries. +/// +/// See the [DPDK ACL documentation](https://doc.dpdk.org/guides/prog_guide/packet_classif_access_ctrl.html) +/// for full details on input grouping rules. +/// +/// # Why the fields are private +/// +/// Fields are private so that callers cannot construct a `FieldDef` whose +/// `field_index` would be out of range for the `N` used in the eventual +/// [`AclBuildConfig`][super::config::AclBuildConfig]. Construction goes +/// through [`FieldDef::new`]; the array-level invariants (`field_index < N`, +/// uniqueness, first-field-is-one-byte) are validated by +/// [`AclBuildConfig::new`][super::config::AclBuildConfig::new] when the +/// definitions are assembled. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct FieldDef { + /// The comparison type for this field. + field_type: FieldType, + /// Width of the field in bytes. + size: FieldSize, + /// Zero-based index of this field within a rule (must be unique per rule layout and < N). + field_index: u8, + /// Input grouping index. + /// + /// Fields are processed in groups of 4 consecutive bytes. All fields that share the same + /// `input_index` must fit within 4 bytes starting at the offset of the first field in the + /// group. + input_index: u8, + /// Byte offset of this field within the input data buffer. + offset: u32, +} + +impl FieldDef { + /// Construct a field definition. + /// + /// The cross-field invariants (`field_index < N`, uniqueness within the + /// array, the first field being one byte wide) are validated by + /// [`AclBuildConfig::new`][super::config::AclBuildConfig::new] when the + /// definitions are assembled into an array. The DPDK 4-byte + /// `input_index` grouping rule is checked by DPDK itself at + /// `rte_acl_build` time. + #[must_use] + pub const fn new( + field_type: FieldType, + size: FieldSize, + field_index: u8, + input_index: u8, + offset: u32, + ) -> Self { + Self { + field_type, + size, + field_index, + input_index, + offset, + } + } + + /// The comparison strategy for this field. + #[must_use] + pub const fn field_type(&self) -> FieldType { + self.field_type + } + + /// The field width in bytes. + #[must_use] + pub const fn size(&self) -> FieldSize { + self.size + } + + /// Zero-based index of this field within the rule layout. + #[must_use] + pub const fn field_index(&self) -> u8 { + self.field_index + } + + /// The input grouping index. + #[must_use] + pub const fn input_index(&self) -> u8 { + self.input_index + } + + /// Byte offset of this field within the input data buffer. + #[must_use] + pub const fn offset(&self) -> u32 { + self.offset + } +} + +impl Display for FieldDef { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!( + f, + "FieldDef {{ type: {}, size: {}, field_index: {}, input_index: {}, offset: {} }}", + self.field_type, self.size, self.field_index, self.input_index, self.offset, + ) + } +} + +impl From for dpdk_sys::rte_acl_field_def { + fn from(def: FieldDef) -> Self { + (&def).into() + } +} + +impl From<&FieldDef> for dpdk_sys::rte_acl_field_def { + fn from(def: &FieldDef) -> Self { + dpdk_sys::rte_acl_field_def { + type_: def.field_type as u8, + size: def.size as u8, + field_index: def.field_index, + input_index: def.input_index, + offset: def.offset, + } + } +} + +// Layout asserts for `rte_acl_field_def`. The `From<&FieldDef>` impl +// above produces an `rte_acl_field_def` value by struct-literal +// composition (not by transmute), so a size/align mismatch with the +// bindgen struct cannot cause UB on its own. These asserts are a +// canary: if DPDK ever changes the layout (added padding, reordered +// fields, widened a type), the `[FieldDef; N] -> [rte_acl_field_def; +// N]` conversion that `AclBuildConfig::to_raw` builds when populating +// `rte_acl_config::defs` would silently produce wrong results. +// Symmetric with the matching asserts on `RuleData` (rule.rs) and +// `AclField` (rule.rs). +const _: () = { + assert!( + core::mem::size_of::() == 8, + "rte_acl_field_def size changed; recheck FieldDef -> rte_acl_field_def conversion" + ); + assert!( + core::mem::align_of::() == 4, + "rte_acl_field_def alignment changed; recheck FieldDef -> rte_acl_field_def conversion" + ); +}; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn field_type_discriminants_match_dpdk() { + assert_eq!( + FieldType::Mask as u8, + dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_MASK as u8 + ); + assert_eq!( + FieldType::Range as u8, + dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_RANGE as u8 + ); + assert_eq!( + FieldType::Bitmask as u8, + dpdk_sys::_bindgen_ty_3::RTE_ACL_FIELD_TYPE_BITMASK as u8 + ); + } + + #[test] + fn field_def_converts_to_raw() { + let def = FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 2, 12); + let raw: dpdk_sys::rte_acl_field_def = def.into(); + assert_eq!(raw.type_, 0); + assert_eq!(raw.size, 4); + assert_eq!(raw.field_index, 1); + assert_eq!(raw.input_index, 2); + assert_eq!(raw.offset, 12); + } + + #[test] + fn field_def_ref_converts_to_raw() { + let def = FieldDef::new(FieldType::Range, FieldSize::Two, 3, 4, 20); + let raw: dpdk_sys::rte_acl_field_def = (&def).into(); + assert_eq!(raw.type_, 1); + assert_eq!(raw.size, 2); + assert_eq!(raw.field_index, 3); + assert_eq!(raw.input_index, 4); + assert_eq!(raw.offset, 20); + } +} diff --git a/dpdk/src/acl/mod.rs b/dpdk/src/acl/mod.rs new file mode 100644 index 0000000000..d920fe8eff --- /dev/null +++ b/dpdk/src/acl/mod.rs @@ -0,0 +1,826 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Safe Rust abstraction over DPDK's ACL (Access Control List) library. +//! +//! This module provides a safe, idiomatic Rust interface to DPDK's packet classification engine. +//! The ACL library builds an optimised trie from a set of rules and uses SIMD-accelerated search +//! to classify input data buffers against those rules at high throughput. +//! +//! # Lifecycle +//! +//! The ACL context follows a **typestate** lifecycle enforced at compile time: +//! +//! ```mermaid +//! stateDiagram-v2 +//! [*] --> Configuring: new() +//! Configuring --> Configuring: add_rules(&mut) +//! Configuring --> Built: build() +//! Built --> Configuring: reset() +//! Built --> Built: classify(&) -> results +//! ``` +//! +//! - [`AclContext`][context::AclContext] -- accepts rule mutations via `&mut self`. +//! The Rust borrow checker enforces DPDK's documented constraint that rule addition and +//! compilation are **not thread-safe**. +//! +//! - [`AclContext`][context::AclContext] -- supports packet classification via `&self`. +//! Because classification is documented by DPDK as **thread-safe**, the `Sync` implementation +//! allows safe concurrent access from multiple threads (e.g. via `Arc`). +//! +//! # Type safety +//! +//! The const generic parameter `N` (number of fields per rule) is shared across +//! [`AclContext`], [`Rule`], and [`AclBuildConfig`]. A field-count mismatch between any of +//! these types is caught at compile time. +//! +//! # Byte order +//! +//! Rule field values must be in **host byte order** (the native endianness of the build target), +//! while input data buffers passed to +//! [`classify`][context::AclContext::classify] must be in **network byte order** (MSB). DPDK +//! handles the conversion internally during trie construction. +//! +//! The wrapper is developed and tested on little-endian targets (x86_64, aarch64). Big-endian +//! targets are not currently exercised; see [`Rule::validate`][rule::Rule::validate] for the +//! soundness guards that catch the most common endian-related footgun. +//! +//! # `mask_range` interpretation +//! +//! The meaning of the `mask_range` value inside an [`AclField`] depends on the +//! [`FieldType`]: +//! +//! | [`FieldType`] | `mask_range` meaning | +//! |----------------------------------------|----------------------| +//! | [`FieldType::Mask`] | **prefix length** -- number of most-significant bits to compare (e.g. `32` for exact match, `24` for `/24`) | +//! | [`FieldType::Range`] | **upper bound** of the range (`value` is the lower bound) | +//! | [`FieldType::Bitmask`] | **bitmask** applied to input before comparison | +//! +//! # Example +//! +//! ```no_run +//! # fn main() -> Result<(), Box> { +//! use core::num::NonZero; +//! +//! use dataplane_dpdk::acl::*; +//! use dataplane_dpdk::socket::SocketId; +//! +//! // Define a simple 5-tuple IPv4 ACL layout (5 fields). +//! const NUM_FIELDS: usize = 5; +//! +//! let field_defs: [FieldDef; NUM_FIELDS] = [ +//! FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), +//! FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 2), +//! FieldDef::new(FieldType::Mask, FieldSize::Four, 2, 2, 6), +//! FieldDef::new(FieldType::Range, FieldSize::Two, 3, 3, 10), +//! FieldDef::new(FieldType::Range, FieldSize::Two, 4, 3, 12), +//! ]; +//! +//! // 1. Create a context (Configuring state). The build config is +//! // supplied up front so that add_rules can validate each rule's +//! // field values against the layout. +//! let params = AclCreateParams::::new( +//! "my_acl", +//! SocketId::ANY, +//! NonZero::new(1024).unwrap(), +//! )?; +//! let build_cfg = AclBuildConfig::new(1, field_defs, 0)?; +//! let mut ctx = AclContext::::new(params, build_cfg)?; +//! +//! // 2. Add rules -- Rule<5> is enforced by the type system. +//! let rule = Rule::new( +//! RuleData { +//! category_mask: CategoryMask::new(1)?, +//! priority: Priority::new(1)?, +//! userdata: NonZero::new(42).unwrap(), +//! }, +//! [ +//! AclField::from_u8(6, 0xFF), // TCP protocol (bitmask) +//! AclField::from_u32(0xC0A80100, 24), // 192.168.1.0/24 (prefix length) +//! AclField::from_u32(0x0A000100, 24), // 10.0.1.0/24 (prefix length) +//! AclField::from_u16(0, u16::MAX), // any src port (range) +//! AclField::from_u16(80, 80), // dst port 80 (range) +//! ], +//! ); +//! ctx.add_rules(&[rule])?; +//! +//! // 3. Build (transitions Configuring -> Built; uses the config from new()). +//! let ctx = ctx.build().map_err(|f| f.error)?; +//! +//! // 4. Classify packets (hot path, &self, thread-safe). +//! // `classify` is `unsafe`: each pointer in `packet_ptrs` must reference +//! // a buffer valid for at least `ctx.build_config().min_input_size()` +//! // bytes -- DPDK loads 4 bytes per `input_index` group, so the safety +//! // contract is wider than `max(offset + size)`. +//! let packet_ptrs: Vec<*const u8> = Vec::new(); // populated by caller +//! let mut results = vec![0u32; packet_ptrs.len()]; +//! unsafe { ctx.classify(&packet_ptrs, &mut results, 1)?; } +//! +//! // results[i] == 0 -> no match +//! // results[i] == 42 -> matched our rule +//! # Ok(()) +//! # } +//! ``` +//! +//! # Modules +//! +//! | Module | Contents | +//! |--------|----------| +//! | [`classify`] | [`ClassifyAlgorithm`] -- SIMD backend selection | +//! | [`config`] | [`AclCreateParams`], [`AclBuildConfig`] -- validated configuration types | +//! | [`context`] | [`AclContext`] -- the typestate context (create, add, build, classify) | +//! | [`error`] | Dedicated error types for each fallible operation | +//! | [`field`] | [`FieldDef`], [`FieldType`], [`FieldSize`] -- rule field layout | +//! | [`rule`] | [`Rule`], [`RuleData`], [`AclField`] -- rule value types | + +pub mod classify; +pub mod config; +pub mod context; +pub mod error; +pub mod field; +pub mod rule; + +// --------------------------------------------------------------------------- +// Convenience re-exports +// --------------------------------------------------------------------------- + +// Context & typestate markers +pub use context::{AclBuildFailure, AclContext, Built, Configuring}; + +// Configuration +pub use config::{AclBuildConfig, AclCreateParams, InvalidAclBuildConfig}; +pub use config::{MAX_CATEGORIES, MAX_FIELDS, RESULTS_MULTIPLIER}; + +// Rules & fields +pub use field::{FieldDef, FieldSize, FieldType}; +pub use rule::{ + AclField, CategoryMask, InvalidCategoryMask, InvalidPriority, Priority, Rule, RuleData, +}; + +// Classification algorithm +pub use classify::{ClassifyAlgorithm, UnknownClassifyAlgorithm}; + +// Errors +pub use error::{ + AclAddRulesError, AclBuildError, AclClassifyError, AclCreateError, AclSetAlgorithmError, + InvalidAclName, InvalidRule, +}; + +// Module-level utilities +pub use context::dump_all_contexts; + +/// End-to-end integration tests for the ACL wrapper, exercising real +/// `rte_acl_*` calls against a live EAL. +/// +/// # EAL configuration (shared by every test in this module) +/// +/// All tests initialize EAL via [`start_eal`][self::tests::start_eal], which +/// passes a fixed set of flags plus two dynamic values: +/// +/// - `--no-huge --in-memory` -- back EAL with anonymous memory instead of +/// hugetlbfs. Keeps the tests runnable on any host without manual hugepage +/// configuration. +/// - `--lcores 0@({allowed_cpus})` -- a single logical lcore (the main), +/// floated across whatever physical CPUs `sched_getaffinity` reports as +/// available to the process. No workers means +/// `rte_eal_mp_remote_launch` has no per-worker readiness flag to read, so +/// we sidestep a benign-but-flagged data race that ThreadSanitizer reports +/// against DPDK's lcore startup, and we also avoid spawning unused worker +/// threads. Floating (instead of pinning to physical CPU 0) keeps the +/// tests honest about cgroups, taskset, and container CPU restrictions. +/// - `--file-prefix ` -- a per-init unique identifier so that +/// concurrent forked test processes do not fight over the EAL runtime +/// configuration namespace. Necessary alongside `--in-memory` because EAL +/// still creates per-process control state in the runtime dir. +/// - `--no-pci --no-telemetry --no-shconf --no-hpet` -- disable everything we +/// do not need so the tests start quickly and have no shared-config files +/// to clean up. +/// +/// # Running once per process +/// +/// `eal::init` may only be called once per process. Every test in this +/// module funnels through the [`EAL`][self::tests::EAL] `OnceLock`, so +/// the init happens exactly once regardless of how the harness schedules +/// tests: nextest's per-test process fork (the workspace default) runs +/// the lazy init once per fork; a single-process runner (`cargo test +/// --test-threads=1` or an in-process parallel harness) runs it once for +/// the lifetime of the process. +/// +/// # Running locally +/// +/// ```text +/// just setup-roots # rebuild DPDK + wrapper +/// # re-enter `nix-shell` so DATAPLANE_SYSROOT picks up the new sysroot +/// cargo nextest run -p dataplane-dpdk acl::tests +/// ``` +#[cfg(test)] +mod tests { + use core::num::NonZero; + + use concurrency::sync::OnceLock; + + use crate::acl::*; + use crate::eal::Eal; + use crate::socket::SocketId; + + /// Number of fields used by all lifecycle tests in this module. + const NUM_FIELDS: usize = 2; + + /// Process-wide EAL initialized on first use, shared by every test. + /// + /// `eal::init` may only be called once per process. Nextest's default + /// per-test process forking makes a per-test `init` trivially safe + /// (each forked process re-initializes EAL exactly once), but a + /// single-process test runner -- `cargo test --test-threads=1`, an + /// in-process parallel harness, or any future configuration that drops + /// the fork -- would call init twice and fail. Funneling every test + /// through this lazy [`OnceLock`] makes the tests correct under both + /// modes: per-process forking initializes once per fork (cheap), + /// in-process initializes once for the lifetime of the process. + /// + /// The `Eal` value is intentionally leaked into the static for the + /// lifetime of the process; DPDK has no clean teardown path, and the + /// `Eal` Drop would (per [`crate::eal::init`]) be unable to free DPDK + /// allocations through the system allocator after the allocator swap. + static EAL: OnceLock = OnceLock::new(); + + /// Lazily initialize EAL on first call. + /// + /// Each test calls this in place of `eal::init`; subsequent calls + /// return the shared `&'static Eal` without re-initializing DPDK. + fn start_eal() -> &'static Eal { + // DPDK pins lcores, but that is generally not what we actually want in a test environment. + // Instead, we need to allocate just lcore 0 (main) and pin it to "everything we legally have access to." + fn allowed_cpus() -> String { + use nix::sched::{CpuSet, sched_getaffinity}; + use nix::unistd::Pid; + let set = sched_getaffinity(Pid::from_raw(0)).expect("sched_getaffinity"); + (0..CpuSet::count()) + .filter(|&i| set.is_set(i).unwrap_or(false)) + .map(|x| x.to_string()) + .collect::>() + .join(",") + } + // concurrent executions of DPDK EAL can fight over allocations and file resources. + // You can prevent that with a unique prefix on the hugepage files it allocates (if any). + let eal_id = format!("{}", id::Id::::new()); + let core_pinning = format!("0@({})", allowed_cpus()); + // EAL arguments used the first time EAL is initialized in this process. + let args: &[&str] = &[ + "--no-huge", + "--no-pci", + "--in-memory", + "--no-telemetry", + "--no-shconf", + "--no-hpet", + "--iova-mode=va", + "--file-prefix", + &eal_id, + // Restrict EAL to a single lcore (the main). Without workers, + // rte_eal_mp_remote_launch has no readiness flags to read and there is + // no DPDK-internal init race for ThreadSanitizer to flag. Also avoids + // spawning unused worker threads. + // + // The `0@()` form means "logical lcore 0, floated across + // the listed physical CPUs": DPDK schedules lcore 0 onto any of + // them rather than pinning to a single CPU. Floating instead of + // pinning keeps the tests honest about cgroups, taskset, and + // container affinity restrictions. + "--lcores", + &core_pinning, + ]; + + EAL.get_or_init(|| super::super::eal::init(args.iter().copied())) + } + + /// Standard field layout used by the lifecycle tests. + /// + /// DPDK ACL requires the first field in the rule definition to be one byte + /// long (it is consumed during trie setup). All subsequent fields must be + /// grouped into sets of 4 consecutive bytes via `input_index`. + fn standard_field_defs() -> [FieldDef; NUM_FIELDS] { + [ + // Field 0: 1-byte entry at offset 0 (required by DPDK to be 1 byte). + FieldDef::new(FieldType::Bitmask, FieldSize::One, 0, 0, 0), + // Field 1: 4-byte Mask field at offset 4, input_index 1. + FieldDef::new(FieldType::Mask, FieldSize::Four, 1, 1, 4), + ] + } + + /// Build a rule that exact-matches the given 32-bit value in field 1. + /// + /// `userdata` becomes the classify result for matching inputs. + fn exact_match_rule(value: u32, userdata: u32) -> Rule { + Rule::new( + RuleData { + category_mask: CategoryMask::new(1).unwrap(), + priority: Priority::new(1).unwrap(), + userdata: NonZero::new(userdata).expect("userdata must be non-zero"), + }, + [ + // Wildcard entry byte: field 0 is FieldType::Bitmask + // (per standard_field_defs). mask = 0 makes the + // predicate `(input & 0) == 0`, which is trivially true + // for any input -- so this field matches any byte at + // offset 0. + AclField::from_u8(0, 0), + // Field 1 is FieldType::Mask; mask_range is interpreted + // as a prefix length, so 32 means "compare all 32 bits". + AclField::from_u32(value, 32), + ], + ) + } + + /// Build an 8-byte input buffer carrying `value` at offset 4 in network byte + /// order, suitable for the field layout returned by [`standard_field_defs`]. + fn input_buffer(value: u32) -> [u8; 8] { + let mut buf = [0u8; 8]; + buf[4..8].copy_from_slice(&value.to_be_bytes()); + buf + } + + /// Build the default `AclBuildConfig` used across the lifecycle tests + /// (`num_categories = 1`, the standard 2-field layout, no max_size). + fn standard_build_config() -> AclBuildConfig { + AclBuildConfig::new(1, standard_field_defs(), 0).expect("build config") + } + + /// End-to-end classify smoke test: build a tiny ACL context, run a real + /// `rte_acl_classify` call, and verify the match / no-match outcomes. + /// See the [module-level docs](self) for the EAL setup that applies to + /// every test here. + #[test] + fn classify_smoke() { + let _eal = start_eal(); + + let params = AclCreateParams::::new( + "test_acl", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let mut ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + + ctx.add_rules(&[exact_match_rule(0xDEAD_BEEF, 1)]) + .expect("add rules"); + + let ctx = ctx.build().map_err(|f| f.error).expect("build"); + + let matching = input_buffer(0xDEAD_BEEF); + let non_matching = input_buffer(0); + + let data_ptrs: Vec<*const u8> = vec![matching.as_ptr(), non_matching.as_ptr()]; + let mut results = vec![0u32; 2]; + // SAFETY: each buffer is 8 bytes; the field layout's max(offset + size) + // is 8 (Mask field at offset 4 of size 4), so each pointer references + // at least that many readable bytes. + unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify"); + + assert_eq!(results[0], 1, "expected match for 0xDEADBEEF"); + assert_eq!(results[1], 0, "expected no match for 0x00000000"); + } + + /// Reset round-trip: build, classify, reset back to Configuring, swap + /// in a new rule, rebuild (no config supplied -- it lives on the + /// context), and verify the new rule's userdata wins. Also asserts + /// that the build config survives the reset. + #[test] + fn reset_round_trip() { + let _eal = start_eal(); + + let original_cfg = standard_build_config(); + let params = AclCreateParams::::new( + "reset_round_trip", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let mut ctx = + AclContext::::new(params, original_cfg.clone()).expect("new context"); + + // First build cycle: match 0xAAAAAAAA -> userdata 1. + ctx.add_rules(&[exact_match_rule(0xAAAA_AAAA, 1)]) + .expect("add rules (first)"); + let ctx = ctx.build().map_err(|f| f.error).expect("build (first)"); + assert_eq!( + ctx.build_config(), + &original_cfg, + "Built context retains the config supplied to new()", + ); + + let first_input = input_buffer(0xAAAA_AAAA); + let data_ptrs: Vec<*const u8> = vec![first_input.as_ptr()]; + let mut results = vec![0u32; 1]; + // SAFETY: see classify_smoke -- same 8-byte buffer / 8-byte layout. + unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify (first)"); + assert_eq!(results[0], 1, "first build should match 0xAAAAAAAA"); + + // Reset back to Configuring (config carries through) and load a + // different rule. + let mut ctx = ctx.reset(); + assert_eq!( + ctx.build_config(), + &original_cfg, + "reset() preserves the build config across Built -> Configuring", + ); + ctx.add_rules(&[exact_match_rule(0xBBBB_BBBB, 2)]) + .expect("add rules (second)"); + let ctx = ctx.build().map_err(|f| f.error).expect("build (second)"); + + let second_input = input_buffer(0xBBBB_BBBB); + let stale_input = input_buffer(0xAAAA_AAAA); + let data_ptrs: Vec<*const u8> = vec![second_input.as_ptr(), stale_input.as_ptr()]; + let mut results = vec![0u32; 2]; + // SAFETY: see classify_smoke. + unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify (second)"); + assert_eq!( + results[0], 2, + "second build should match 0xBBBBBBBB with userdata 2" + ); + assert_eq!(results[1], 0, "second build must not retain the first rule"); + } + + /// `add_rules` rejects a rule whose [`FieldType::Mask`] field carries a + /// prefix length larger than the field's bit width. Without this + /// wrapper-side check, DPDK's `RTE_ACL_MASKLEN_TO_BITMASK` would + /// perform a C shift by an out-of-range amount (UB). + #[test] + fn add_rules_rejects_out_of_range_prefix_length() { + let _eal = start_eal(); + + let params = AclCreateParams::::new( + "prefix_len_validate", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let mut ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + + // Field 1 in standard_field_defs is a 4-byte Mask field, so the + // maximum legal prefix length is 32. 33 is out of range. + let bad_rule: Rule = Rule::new( + RuleData { + category_mask: CategoryMask::new(1).unwrap(), + priority: Priority::new(1).unwrap(), + userdata: NonZero::new(1).unwrap(), + }, + [ + AclField::from_u8(0, 0), + AclField::from_u32(0, 33), // prefix_length = 33, max = 32 + ], + ); + let err = ctx + .add_rules(&[bad_rule]) + .expect_err("out-of-range prefix length must be rejected"); + assert!( + matches!( + err, + AclAddRulesError::InvalidRule { + rule_index: 0, + source: error::InvalidRule::PrefixLengthOutOfRange { + prefix_length: 33, + max_bits: 32, + .. + }, + } + ), + "expected PrefixLengthOutOfRange, got {err:?}", + ); + } + + /// `set_default_algorithm` happy path: build, switch to a specific + /// algorithm, and classify. Uses `Default` which is always supported. + #[test] + fn set_default_algorithm_then_classify() { + let _eal = start_eal(); + + let params = AclCreateParams::::new( + "set_algo", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let mut ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + ctx.add_rules(&[exact_match_rule(0xCAFE_BABE, 7)]) + .expect("add rules"); + let mut ctx = ctx.build().map_err(|f| f.error).expect("build"); + + // `Default` is always available on any CPU DPDK runs on. + ctx.set_default_algorithm(ClassifyAlgorithm::Default) + .expect("set_default_algorithm"); + + let buf = input_buffer(0xCAFE_BABE); + let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()]; + let mut results = vec![0u32; 1]; + // SAFETY: see classify_smoke. + unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify"); + assert_eq!(results[0], 7); + } + + /// `classify` must reject `categories` values that would overflow DPDK's + /// per-thread runtime arrays sized to `RTE_ACL_MAX_CATEGORIES`, even when + /// the user's `results` slice is generous enough to satisfy the + /// per-element length check. + #[test] + fn classify_categories_validated_before_ffi() { + let _eal = start_eal(); + + let params = AclCreateParams::::new( + "cat_validation", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let mut ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + ctx.add_rules(&[exact_match_rule(0xAAAA_AAAA, 1)]) + .expect("add rules"); + let ctx = ctx.build().map_err(|f| f.error).expect("build"); + + let buf = input_buffer(0xAAAA_AAAA); + let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()]; + + // results slice large enough to pass the length check, but categories + // out of range -- must still be rejected. + let mut results = vec![0u32; 64]; + + // categories = 0 + // SAFETY: see classify_smoke. + let r = unsafe { ctx.classify(&data_ptrs, &mut results, 0) }; + assert!(matches!(r, Err(AclClassifyError::InvalidArgs))); + + // categories > MAX_CATEGORIES (= 16) + // SAFETY: see classify_smoke. + let r = unsafe { ctx.classify(&data_ptrs, &mut results, MAX_CATEGORIES + 1) }; + assert!(matches!(r, Err(AclClassifyError::InvalidArgs))); + + // categories > 1 but not a multiple of RESULTS_MULTIPLIER (= 4) + // SAFETY: see classify_smoke. + let r = unsafe { ctx.classify(&data_ptrs, &mut results, 3) }; + assert!(matches!(r, Err(AclClassifyError::InvalidArgs))); + } + + /// Creating a second [`AclContext`] with a name already registered in + /// DPDK's global ACL list must fail with [`AclCreateError::AlreadyExists`] + /// rather than silently aliasing the first context (which would + /// double-free on drop). + #[test] + fn duplicate_name_rejected() { + let _eal = start_eal(); + + let params_a = AclCreateParams::::new( + "dup_name", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let _ctx_a = + AclContext::::new(params_a, standard_build_config()).expect("first new"); + + let params_b = AclCreateParams::::new( + "dup_name", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params (dup)"); + let err = AclContext::::new(params_b, standard_build_config()) + .expect_err("second new with same name must fail"); + assert!( + matches!(err, AclCreateError::AlreadyExists { ref name } if name == "dup_name"), + "expected AlreadyExists, got {err:?}", + ); + } + + /// Recovery after `add_rules` overflows `max_rule_num`: the context must + /// remain usable. We submit one rule successfully, then submit more rules + /// than the remaining capacity allows, expect the error, and finally build + /// and classify against the first rule. + #[test] + fn add_rules_after_overflow_failure() { + let _eal = start_eal(); + + // `max_rule_num` of 1: a second add_rules call with any rule will + // overflow. + let params = AclCreateParams::::new( + "overflow_recover", + SocketId::ANY, + NonZero::new(1).unwrap(), + ) + .expect("create params"); + let mut ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + + ctx.add_rules(&[exact_match_rule(0x1111_1111, 1)]) + .expect("first add_rules should succeed"); + + // Attempting to add another rule must fail: capacity is exhausted. + // DPDK signals "no room left in the rule list" with -ENOMEM, which + // the wrapper maps to AclAddRulesError::OutOfMemory. Pin the variant + // so a future change in mapping or DPDK's behaviour surfaces as a + // test failure rather than silently passing through. + let extra = exact_match_rule(0x2222_2222, 2); + let err = ctx + .add_rules(&[extra]) + .expect_err("second add_rules should fail when over capacity"); + assert!( + matches!(err, AclAddRulesError::OutOfMemory), + "expected OutOfMemory from capacity exhaustion, got {err:?}", + ); + + // Context must still be usable: build + classify against the first rule. + let ctx = ctx + .build() + .map_err(|f| f.error) + .expect("build after recoverable add_rules failure"); + + let buf = input_buffer(0x1111_1111); + let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()]; + let mut results = vec![0u32; 1]; + // SAFETY: see classify_smoke. + unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify"); + assert_eq!(results[0], 1); + } + + /// Build failure recovery: when `build()` fails, the wrapper returns + /// the original `Configuring` context inside `AclBuildFailure`. The + /// caller must be able to keep using it (add rules, retry). We force + /// the failure by calling `build()` with no rules added (DPDK rejects + /// `num_rules == 0` with `-EINVAL`). + #[test] + fn build_failure_returns_usable_context() { + let _eal = start_eal(); + + let params = AclCreateParams::::new( + "build_failure_recovery", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + + // First build with zero rules must fail. + let failure = ctx.build().expect_err("build() with no rules must fail"); + assert!( + matches!(failure.error, AclBuildError::InvalidConfig), + "expected InvalidConfig, got {:?}", + failure.error, + ); + + // Recover the context, add a rule, build again -- must succeed. + let mut ctx = failure.context; + ctx.add_rules(&[exact_match_rule(0xDEAD_BEEF, 1)]) + .expect("add rules after recovery"); + let ctx = ctx + .build() + .map_err(|f| f.error) + .expect("second build succeeds"); + + let buf = input_buffer(0xDEAD_BEEF); + let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()]; + let mut results = vec![0u32; 1]; + // SAFETY: see classify_smoke. + unsafe { ctx.classify(&data_ptrs, &mut results, 1) }.expect("classify"); + assert_eq!(results[0], 1); + } + + /// `add_rules` rejects a rule whose `category_mask` has bits set at + /// positions `>= config.num_categories()`. DPDK would silently mask + /// off those bits at build time, narrowing the rule's intended + /// category set; we surface this at `add_rules` time instead. + #[test] + fn add_rules_rejects_category_mask_beyond_num_categories() { + let _eal = start_eal(); + + let params = AclCreateParams::::new( + "cat_mask_validate", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + // standard_build_config uses num_categories = 1, so only bit 0 is + // legal. Build a rule with bit 1 also set. + let mut ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + + let bad_rule: Rule = Rule::new( + RuleData { + category_mask: CategoryMask::new(0b11).unwrap(), + priority: Priority::new(1).unwrap(), + userdata: NonZero::new(1).unwrap(), + }, + [AclField::from_u8(0, 0), AclField::from_u32(0xAAAA_AAAA, 32)], + ); + let err = ctx + .add_rules(&[bad_rule]) + .expect_err("category_mask with bits beyond num_categories must be rejected"); + assert!( + matches!( + err, + AclAddRulesError::InvalidRule { + rule_index: 0, + source: error::InvalidRule::CategoryMaskExceedsNumCategories { + category_mask: 0b11, + num_categories: 1, + extra_bits: 0b10, + }, + } + ), + "expected CategoryMaskExceedsNumCategories, got {err:?}", + ); + } + + /// Concurrent classify under `Arc>>`: spawns + /// several worker threads, each calling + /// [`AclContext::classify`][crate::acl::AclContext::classify] in a + /// tight loop, and verifies every thread sees the correct match. + /// Exercises the per-state `Sync` impl on [`Built`] and ensures + /// the wrapper's "share across classification threads" claim isn't + /// vacuous. Test runs with N=4 workers and M=1000 iterations each + /// to give the OS scheduler a chance to interleave. + #[test] + fn classify_concurrent_arc_shared() { + use std::sync::Arc; + use std::thread; + + let _eal = start_eal(); + + const WORKERS: usize = 4; + const ITERS_PER_WORKER: usize = 1000; + + let params = AclCreateParams::::new( + "classify_concurrent", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let mut ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + ctx.add_rules(&[exact_match_rule(0xDEAD_BEEF, 1)]) + .expect("add rules"); + let ctx: Arc>> = + Arc::new(ctx.build().map_err(|f| f.error).expect("build")); + + let handles: Vec<_> = (0..WORKERS) + .map(|worker| { + let ctx = Arc::clone(&ctx); + thread::spawn(move || { + // Each worker owns its own buffers; classify is the + // only place we share state across threads. + let matching = input_buffer(0xDEAD_BEEF); + let non_matching = input_buffer(0); + for _ in 0..ITERS_PER_WORKER { + let data_ptrs: Vec<*const u8> = + vec![matching.as_ptr(), non_matching.as_ptr()]; + let mut results = vec![0u32; 2]; + // SAFETY: see classify_smoke. + unsafe { ctx.classify(&data_ptrs, &mut results, 1) } + .unwrap_or_else(|e| panic!("worker {worker}: classify failed: {e:?}")); + assert_eq!( + results[0], 1, + "worker {worker}: expected match for 0xDEADBEEF", + ); + assert_eq!(results[1], 0, "worker {worker}: expected no match for 0",); + } + }) + }) + .collect(); + for h in handles { + h.join().expect("worker thread panicked"); + } + } + + /// `classify_with_algorithm` with a non-`Default` algorithm: locks in + /// the special-casing in [`AclContext::classify_with_algorithm`] by + /// dispatching through the `Scalar` variant (always available on every + /// CPU DPDK runs on) and verifying classification still works. + #[test] + fn classify_with_algorithm_scalar() { + let _eal = start_eal(); + + let params = AclCreateParams::::new( + "classify_alg_scalar", + SocketId::ANY, + NonZero::new(16).unwrap(), + ) + .expect("create params"); + let mut ctx = + AclContext::::new(params, standard_build_config()).expect("new context"); + ctx.add_rules(&[exact_match_rule(0xFEED_FACE, 9)]) + .expect("add rules"); + let ctx = ctx.build().map_err(|f| f.error).expect("build"); + + let buf = input_buffer(0xFEED_FACE); + let data_ptrs: Vec<*const u8> = vec![buf.as_ptr()]; + let mut results = vec![0u32; 1]; + // SAFETY: see classify_smoke. + unsafe { + ctx.classify_with_algorithm(&data_ptrs, &mut results, 1, ClassifyAlgorithm::Scalar) + } + .expect("classify_with_algorithm(Scalar)"); + assert_eq!(results[0], 9); + } +} diff --git a/dpdk/src/acl/rule.rs b/dpdk/src/acl/rule.rs new file mode 100644 index 0000000000..198914eea3 --- /dev/null +++ b/dpdk/src/acl/rule.rs @@ -0,0 +1,1095 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! ACL rule types. +//! +//! These types provide safe, `#[repr(C)]` wrappers around the DPDK ACL rule structures. +//! The key types are: +//! +//! - [`RuleData`] -- rule metadata (category mask, priority, user data). +//! - [`AclField`] -- a single field value with its mask or range bound. +//! - [`Rule`]`` -- a complete rule comprising [`RuleData`] followed by `N` [`AclField`] entries. +//! +//! # Layout guarantee +//! +//! [`Rule`]`` is `#[repr(C)]` and has an identical memory layout to the struct produced by +//! DPDK's `RTE_ACL_RULE_DEF(name, N)` C macro. This means a `*const Rule` can be safely cast +//! to `*const rte_acl_rule` when calling [`rte_acl_add_rules`][dpdk_sys::rte_acl_add_rules], +//! provided the context was created with `rule_size = size_of::>()`. +//! +//! # Byte order +//! +//! All fields in [`Rule`] structures are expected to be in **host byte order**, as documented by +//! DPDK. This is in contrast to the *input data buffers* passed to +//! [`rte_acl_classify`][dpdk_sys::rte_acl_classify], which must be in **network byte order** +//! (MSB). + +use core::fmt; +use core::mem; +use core::num::NonZero; + +// --------------------------------------------------------------------------- +// Priority +// --------------------------------------------------------------------------- + +/// DPDK ACL rule priority bounds. +/// +/// A result of `0` from classification means "no match", so valid user data values and priorities +/// must respect these bounds. +pub mod priority { + /// Minimum valid rule priority (inclusive). + /// + /// Corresponds to + /// [`RTE_ACL_MIN_PRIORITY`][dpdk_sys::_bindgen_ty_4::RTE_ACL_MIN_PRIORITY]. + pub const MIN: i32 = dpdk_sys::_bindgen_ty_4::RTE_ACL_MIN_PRIORITY as i32; + + /// Maximum valid rule priority (inclusive). + /// + /// Corresponds to + /// [`RTE_ACL_MAX_PRIORITY`][dpdk_sys::_bindgen_ty_4::RTE_ACL_MAX_PRIORITY]. + pub const MAX: i32 = dpdk_sys::_bindgen_ty_4::RTE_ACL_MAX_PRIORITY as i32; +} + +/// A validated ACL rule priority. +/// +/// The inner [`NonZero`] is guaranteed to fall in the closed range +/// \[[`priority::MIN`], [`priority::MAX`]\] (DPDK's `RTE_ACL_MIN_PRIORITY` is +/// `1`, so zero is unreachable). `#[repr(transparent)]` means this is +/// layout-compatible with the underlying `i32` field of +/// [`rte_acl_rule_data`][dpdk_sys::rte_acl_rule_data], and `Option` +/// is niche-optimised down to 4 bytes -- matching the +/// [`userdata: NonZero`](RuleData) treatment. +/// +/// Construct via [`new`][Priority::new] (which is `const fn`, so it works in +/// `const` contexts at the cost of an `?` or `.unwrap()`). The +/// [`MIN`][Priority::MIN] and [`MAX`][Priority::MAX] constants are pre-validated +/// shorthand for the range endpoints. +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Priority(NonZero); + +/// Error returned when [`Priority::new`] is given an out-of-range value. +#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)] +#[error( + "ACL priority {value} out of range [{}, {}]", + priority::MIN, + priority::MAX +)] +pub struct InvalidPriority { + /// The out-of-range value the caller supplied. + pub value: i32, +} + +impl Priority { + // Both constants below evaluate at compile time. `NonZero::new` + + // `.unwrap()` in a const context surfaces as a const-eval error + // (not a runtime panic) if the value happens to be zero -- which + // would itself be a compile-time bug. Clippy's + // `useless_nonzero_new_unchecked` lint prefers this form over + // `NonZero::new_unchecked` for const items. + + /// Smallest valid priority value (equal to [`priority::MIN`] = DPDK's + /// `RTE_ACL_MIN_PRIORITY`, currently `1`). + pub const MIN: Self = match NonZero::new(priority::MIN) { + Some(nz) => Self(nz), + // unreachable in const context: priority::MIN is a positive i32 + // (verified at compile time); reaching this arm would be a + // compile error, not a runtime panic. + None => panic!("priority::MIN must be non-zero"), + }; + + /// Largest valid priority value (equal to [`priority::MAX`] = DPDK's + /// `RTE_ACL_MAX_PRIORITY`). + pub const MAX: Self = match NonZero::new(priority::MAX) { + Some(nz) => Self(nz), + None => panic!("priority::MAX must be non-zero"), + }; + + /// Construct a `Priority` from a raw value. + /// + /// # Errors + /// + /// Returns [`InvalidPriority`] when `value` is outside + /// \[[`priority::MIN`], [`priority::MAX`]\]. + pub const fn new(value: i32) -> Result { + if value < priority::MIN || value > priority::MAX { + return Err(InvalidPriority { value }); + } + // priority::MIN == 1 (DPDK's RTE_ACL_MIN_PRIORITY), so the + // range check above guarantees value >= 1 and therefore != 0; + // the `unreachable!()` arm is dead. Preferred over + // `unsafe { NonZero::new_unchecked }` so a wrong invariant + // faults loudly instead of being undefined behaviour. + match NonZero::new(value) { + Some(nz) => Ok(Self(nz)), + None => unreachable!(), + } + } + + /// Get the raw `i32`. + #[must_use] + pub const fn get(self) -> i32 { + self.0.get() + } +} + +impl fmt::Display for Priority { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl TryFrom for Priority { + type Error = InvalidPriority; + fn try_from(value: i32) -> Result { + Self::new(value) + } +} + +// --------------------------------------------------------------------------- +// CategoryMask +// --------------------------------------------------------------------------- + +/// A validated category bitmask for an ACL rule. +/// +/// Each bit corresponds to one category (bit `i` enables category `i`). DPDK +/// supports up to [`RTE_ACL_MAX_CATEGORIES`][dpdk_sys::RTE_ACL_MAX_CATEGORIES] +/// categories, so only the low `RTE_ACL_MAX_CATEGORIES` bits may be set. +/// +/// `#[repr(transparent)]` and inner [`NonZero`] make `Option` +/// niche-optimised to 4 bytes and rule out the zero-mask case (a rule with no +/// categories enabled can never match). The bit-range check enforces the +/// type-level invariant that no out-of-range categories are referenced. +/// +/// A successful build with `num_categories = k` does not imply `k = 32`; the +/// per-build category count is checked by DPDK at `rte_acl_build` time. This +/// newtype enforces the upper bound common to all builds. +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CategoryMask(NonZero); + +/// Error returned when [`CategoryMask::new`] is given an invalid bitmask. +#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)] +pub enum InvalidCategoryMask { + /// The mask is zero -- the rule would match no category. + #[error("category mask is zero")] + Zero, + /// The mask has bits set above `RTE_ACL_MAX_CATEGORIES`. + #[error( + "category mask {value:#010x} has bits set above bit {} \ + (RTE_ACL_MAX_CATEGORIES = {})", + dpdk_sys::RTE_ACL_MAX_CATEGORIES - 1, + dpdk_sys::RTE_ACL_MAX_CATEGORIES + )] + OutOfRange { + /// The out-of-range value the caller supplied. + value: u32, + }, +} + +impl CategoryMask { + /// Bit mask covering all categories DPDK supports: bits 0 through + /// `RTE_ACL_MAX_CATEGORIES - 1` inclusive. + pub const ALLOWED_BITS: u32 = { + // Avoid (1 << 32) overflow when MAX_CATEGORIES is 32; (1u32 << 32) is UB + // in C and a debug-panic in Rust, so guard. + let max = dpdk_sys::RTE_ACL_MAX_CATEGORIES; + if max >= 32 { + u32::MAX + } else { + (1u32 << max) - 1 + } + }; + + /// Construct a `CategoryMask` from a raw `u32`. + /// + /// # Errors + /// + /// - [`InvalidCategoryMask::Zero`] if `value == 0`. + /// - [`InvalidCategoryMask::OutOfRange`] if any bit above + /// `RTE_ACL_MAX_CATEGORIES - 1` is set. + pub const fn new(value: u32) -> Result { + if value == 0 { + return Err(InvalidCategoryMask::Zero); + } + if value & !Self::ALLOWED_BITS != 0 { + return Err(InvalidCategoryMask::OutOfRange { value }); + } + // The `value == 0` check above guarantees value != 0, so the + // `unreachable!()` arm is dead. Preferred over + // `unsafe { NonZero::new_unchecked }` so a wrong invariant + // faults loudly instead of being undefined behaviour. + match NonZero::new(value) { + Some(nz) => Ok(Self(nz)), + None => unreachable!(), + } + } + + /// The raw `u32` value. + #[must_use] + pub const fn get(self) -> u32 { + self.0.get() + } +} + +impl fmt::Display for CategoryMask { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:#010x}", self.0.get()) + } +} + +impl TryFrom for CategoryMask { + type Error = InvalidCategoryMask; + fn try_from(value: u32) -> Result { + Self::new(value) + } +} + +// --------------------------------------------------------------------------- +// RuleData +// --------------------------------------------------------------------------- + +/// Metadata associated with an ACL rule. +/// +/// This is the safe Rust equivalent of [`rte_acl_rule_data`][dpdk_sys::rte_acl_rule_data] and has +/// an identical `#[repr(C)]` memory layout. +/// +/// # Important: `userdata` must be non-zero +/// +/// DPDK uses `userdata == 0` as a sentinel meaning "no match". If you set `userdata` to `0`, +/// the rule will effectively never be reported as matching. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct RuleData { + /// Bitmask of categories this rule applies to. + /// + /// Each bit corresponds to one category (bit `i` enables category `i`). + /// Validated at construction; see [`CategoryMask::new`]. + pub category_mask: CategoryMask, + + /// Rule priority. Higher numeric value means higher priority. + /// + /// When multiple rules match a given input for the same category, the rule with the highest + /// priority wins. Validated to be in the range + /// \[[`priority::MIN`], [`priority::MAX`]\] at construction; see [`Priority::new`]. + pub priority: Priority, + + /// Opaque value returned to the caller on match. + /// + /// **Must be non-zero.** A classification result of `0` indicates that no rule matched. + pub userdata: NonZero, +} + +// Compile-time layout assertions against the raw DPDK type. +const _: () = { + assert!( + mem::size_of::() == mem::size_of::(), + "RuleData size must match rte_acl_rule_data" + ); + assert!( + mem::align_of::() == mem::align_of::(), + "RuleData alignment must match rte_acl_rule_data" + ); +}; + +impl fmt::Display for RuleData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "RuleData {{ category_mask: {}, priority: {}, userdata: {} }}", + self.category_mask, self.priority, self.userdata, + ) + } +} + +// --------------------------------------------------------------------------- +// AclField +// --------------------------------------------------------------------------- + +/// A single field value within an ACL rule. +/// +/// This is the safe Rust equivalent of [`rte_acl_field`][dpdk_sys::rte_acl_field] and has an +/// identical `#[repr(C)]` memory layout. +/// +/// The interpretation of the value and mask/range depends on the +/// [`FieldType`][super::field::FieldType] specified in the corresponding +/// [`FieldDef`][super::field::FieldDef]: +/// +/// | [`FieldType`][super::field::FieldType] | value | mask/range | +/// |----------------------------------------|------------|--------------------| +/// | [`Mask`][super::field::FieldType::Mask] | match value | prefix length | +/// | [`Range`][super::field::FieldType::Range] | range low | range high | +/// | [`Bitmask`][super::field::FieldType::Bitmask] | match value | bitmask | +/// +/// Use the [`from_u8`][AclField::from_u8], [`from_u16`][AclField::from_u16], +/// [`from_u32`][AclField::from_u32], or [`from_u64_raw`][AclField::from_u64_raw] constructors to set +/// the value and mask/range for the appropriate field width. +/// +/// # Why the union fields are private +/// +/// The `rte_acl_field_types` union is exposed via private fields so that safe +/// code cannot construct an `AclField` with a narrow union member set and +/// uninitialized upper bytes (e.g. `rte_acl_field_types { u8_: 5 }` leaves +/// bytes 1..8 undefined). Safe accessors read `u64_` and would observe those +/// uninit bytes, which is undefined behavior. Forcing construction through +/// [`from_u8`][AclField::from_u8] / [`from_u16`][AclField::from_u16] / +/// [`from_u32`][AclField::from_u32] / [`from_u64_raw`][AclField::from_u64_raw] (each +/// of which zeroes the full 8 bytes before writing the narrow member) +/// upholds the "all 8 bytes initialized" invariant that the union accessors +/// rely on. +/// +/// `AclField` is layout-compatible with [`rte_acl_field`][dpdk_sys::rte_acl_field] (verified by +/// the const asserts below). We keep the Rust newtype rather than re-exporting the bindgen +/// struct so that we can attach typed constructors, safe accessors, and proper `Debug` / +/// `Display` impls without leaking the `_bindgen_ty_*` union name into consumer code. +// INVARIANT (union access on AclField). +// +// Every `AclField` reachable through this crate's safe API must have its +// `value` and `mask_range` unions **fully initialized in all 8 bytes**. All +// constructors uphold this: +// +// * `Default::default()` -- explicit `u64_: 0` initializer per union +// (zeroes all 8 bytes; no `unsafe` needed). +// * `from_u8` / `from_u16` / `from_u32` -- call `Self::default()` first +// (zeroing both unions) then overwrite a narrow member. +// * `from_u64_raw` -- writes both unions with explicit `u64_` initializers. +// * `zero()` -- delegates to `Default::default()`. +// +// Given this invariant, reading any union member (including the widest, +// `u64_`) is sound: every member of `rte_acl_field_types` is an integer +// type, so any bit pattern is a valid value. Each `unsafe` block that +// reads a union member cites this anchor as its SAFETY argument so that +// removing one impl (e.g. `Debug`) does not orphan the invariant for the +// others. +// +// The `mem::size_of::() == 8` const-assert below is +// the load-bearing check that "writing 8 bytes" covers the whole union; +// a future bindgen change adding a non-integer member trips it. +#[repr(C)] +#[derive(Copy, Clone)] +pub struct AclField { + /// The match value (or range lower bound). Private -- see the type-level + /// doc for why, and the INVARIANT comment above for the union-access + /// soundness argument. + value: dpdk_sys::rte_acl_field_types, + /// The mask, bitmask, or range upper bound (interpretation depends on the + /// field type). Private -- see the type-level doc and the INVARIANT + /// comment above. + mask_range: dpdk_sys::rte_acl_field_types, +} + +// Compile-time layout assertions against the raw DPDK type. +// +// The union-accessor soundness argument (every constructor writes 8 bytes; +// every union member is an integer type) depends on the union being exactly +// 8 bytes wide. We assert that directly so a future bindgen change that +// adds, e.g., a `__m128` member trips here rather than silently making the +// safe accessors unsound. +const _: () = { + assert!( + mem::size_of::() == 8, + "rte_acl_field_types union must be exactly 8 bytes for the \ + 'all 8 bytes initialized' invariant on AclField accessors" + ); + assert!( + mem::size_of::() == mem::size_of::(), + "AclField size must match rte_acl_field" + ); + assert!( + mem::align_of::() == mem::align_of::(), + "AclField alignment must match rte_acl_field" + ); +}; + +impl Default for AclField { + /// Returns a zero-initialized field. + /// + /// For [`Mask`][super::field::FieldType::Mask]-type fields, this is a wildcard that matches + /// any input (value `0` with mask `0`). + fn default() -> Self { + // Explicit per-union initialization through the `u64_` member + // zeroes all 8 bytes of each union without going through + // `mem::zeroed`. This is safe (no `unsafe` needed) and upholds + // the same "all 8 bytes initialised" invariant the union + // accessors rely on. + Self { + value: dpdk_sys::rte_acl_field_types { u64_: 0 }, + mask_range: dpdk_sys::rte_acl_field_types { u64_: 0 }, + } + } +} + +impl fmt::Debug for AclField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // SAFETY: see the INVARIANT (union access on AclField) block above + // the struct definition. Every constructor leaves all 8 bytes of + // each union initialized; reading `u64_` is defined behavior. + let (value, mask) = unsafe { (self.value.u64_, self.mask_range.u64_) }; + f.debug_struct("AclField") + .field("value", &format_args!("{value:#018x}")) + .field("mask_range", &format_args!("{mask:#018x}")) + .finish() + } +} + +impl fmt::Display for AclField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // SAFETY: see the INVARIANT (union access on AclField) block above + // the struct definition. + // + // Format choice: labeled `value=... mask_range=...` instead of + // `value/mask_range`. The latter reads like a CIDR prefix + // (`addr/len`), but `mask_range` for Mask-typed fields actually IS + // a prefix length while for Bitmask/Range it's a bitmask or upper + // bound -- the slash form would mislead in two of three cases. + let (value, mask) = unsafe { (self.value.u64_, self.mask_range.u64_) }; + write!(f, "value={value:#018x} mask_range={mask:#018x}") + } +} + +impl PartialEq for AclField { + fn eq(&self, other: &Self) -> bool { + // SAFETY: see the INVARIANT (union access on AclField) block above the struct definition. + unsafe { + self.value.u64_ == other.value.u64_ && self.mask_range.u64_ == other.mask_range.u64_ + } + } +} + +// `Eq` cannot be derived because the underlying bindgen union does not implement `Eq`. +// Manual impl is sound because `PartialEq` is reflexive for the integer-typed union members. +impl Eq for AclField {} + +impl core::hash::Hash for AclField { + fn hash(&self, state: &mut H) { + // SAFETY: see the INVARIANT (union access on AclField) block above the struct definition. + let (value, mask) = unsafe { (self.value.u64_, self.mask_range.u64_) }; + value.hash(state); + mask.hash(state); + } +} + +impl AclField { + /// Create a field from `u8` value and mask/range. + /// + /// Use this for fields declared with [`FieldSize::One`][super::field::FieldSize::One]. + /// + /// The upper bytes of the underlying union are zeroed. + #[must_use] + pub fn from_u8(value: u8, mask_range: u8) -> Self { + // Zero-initialize first so that the upper bytes are deterministic. + let mut field = Self::default(); + field.value.u8_ = value; + field.mask_range.u8_ = mask_range; + field + } + + /// Create a field from `u16` value and mask/range. + /// + /// Use this for fields declared with [`FieldSize::Two`][super::field::FieldSize::Two]. + /// + /// The upper bytes of the underlying union are zeroed. + #[must_use] + pub fn from_u16(value: u16, mask_range: u16) -> Self { + let mut field = Self::default(); + field.value.u16_ = value; + field.mask_range.u16_ = mask_range; + field + } + + /// Create a field from `u32` value and mask/range. + /// + /// Use this for fields declared with [`FieldSize::Four`][super::field::FieldSize::Four]. + /// + /// The upper bytes of the underlying union are zeroed. + #[must_use] + pub fn from_u32(value: u32, mask_range: u32) -> Self { + let mut field = Self::default(); + field.value.u32_ = value; + field.mask_range.u32_ = mask_range; + field + } + + /// Create a field from a raw `u64` value and mask/range, writing all + /// 8 bytes of each union member directly. + /// + /// The wrapper's [`FieldSize`][super::field::FieldSize] caps at 4 + /// bytes, so bits above the declared `size_bytes * 8` are ignored by + /// DPDK at classify time and will be rejected by + /// [`Rule::validate`] / + /// [`add_rules`][super::context::AclContext::add_rules] when + /// invariant-checking against the + /// [`AclBuildConfig`][super::config::AclBuildConfig]. Prefer + /// [`from_u8`][AclField::from_u8] / [`from_u16`][AclField::from_u16] / + /// [`from_u32`][AclField::from_u32] for normal use; this constructor + /// exists for explicit bit-pattern composition (e.g. test fixtures + /// or low-level data interop). + #[must_use] + pub fn from_u64_raw(value: u64, mask_range: u64) -> Self { + Self { + value: dpdk_sys::rte_acl_field_types { u64_: value }, + mask_range: dpdk_sys::rte_acl_field_types { u64_: mask_range }, + } + } + + /// Create a fully-zeroed field -- value `0` with mask/range `0`. + /// + /// Equivalent to [`AclField::default()`]. + /// + /// # Important: this is **not** a universal wildcard + /// + /// Whether a zero field matches anything depends on the field's + /// [`FieldType`][super::field::FieldType] in the build config: + /// + /// - [`Mask`][super::field::FieldType::Mask] -- matches **anything** + /// (`mask_range == 0` means "prefix length 0", i.e. compare zero bits). + /// - [`Range`][super::field::FieldType::Range] -- matches **only the + /// value 0** (low and high bounds both 0). For a range wildcard use + /// [`from_u32`][AclField::from_u32]`(0, u32::MAX)` or the appropriate + /// width. + /// - [`Bitmask`][super::field::FieldType::Bitmask] -- matches anything + /// (predicate is `(input & 0) == 0`, which is trivially true), but + /// you almost always want a non-zero mask in practice; reach for an + /// explicit constructor instead. + #[must_use] + pub fn zero() -> Self { + Self::default() + } + + /// Read the value as `u8`. + /// + /// Reading any integer-typed union member is sound for any [`AclField`] + /// constructed through this crate's public API. The caller should still + /// ensure the field was constructed via [`from_u8`][AclField::from_u8] or + /// that the `u8` interpretation is meaningful in context; otherwise the + /// returned value is the low byte of whatever wider member was stored. + #[must_use] + pub fn value_u8(&self) -> u8 { + // SAFETY: see the INVARIANT (union access on AclField) block + // above the struct definition. Every constructor leaves all 8 + // bytes of each union initialized via explicit `u64_: 0` followed + // by narrow-member writes, so reading any union member is defined + // behavior. + unsafe { self.value.u8_ } + } + + /// Read the mask/range as `u8`. + /// + /// See [`value_u8`][AclField::value_u8] for the interpretation note. + #[must_use] + pub fn mask_range_u8(&self) -> u8 { + // SAFETY: see value_u8. + unsafe { self.mask_range.u8_ } + } + + /// Read the value as `u16`. + /// + /// See [`value_u8`][AclField::value_u8] for the interpretation note. + #[must_use] + pub fn value_u16(&self) -> u16 { + // SAFETY: see value_u8. + unsafe { self.value.u16_ } + } + + /// Read the mask/range as `u16`. + /// + /// See [`value_u8`][AclField::value_u8] for the interpretation note. + #[must_use] + pub fn mask_range_u16(&self) -> u16 { + // SAFETY: see value_u8. + unsafe { self.mask_range.u16_ } + } + + /// Read the value as `u32`. + /// + /// See [`value_u8`][AclField::value_u8] for the interpretation note. + #[must_use] + pub fn value_u32(&self) -> u32 { + // SAFETY: see value_u8. + unsafe { self.value.u32_ } + } + + /// Read the mask/range as `u32`. + /// + /// See [`value_u8`][AclField::value_u8] for the interpretation note. + #[must_use] + pub fn mask_range_u32(&self) -> u32 { + // SAFETY: see value_u8. + unsafe { self.mask_range.u32_ } + } + + /// Read the value as `u64`. + #[must_use] + pub fn value_u64(&self) -> u64 { + // SAFETY: see the INVARIANT (union access on AclField) block above the struct definition. + unsafe { self.value.u64_ } + } + + /// Read the mask/range as `u64`. + #[must_use] + pub fn mask_range_u64(&self) -> u64 { + // SAFETY: see the INVARIANT (union access on AclField) block above the struct definition. + unsafe { self.mask_range.u64_ } + } +} + +// --------------------------------------------------------------------------- +// Rule +// --------------------------------------------------------------------------- + +/// A complete ACL rule with `N` fields. +/// +/// This type is `#[repr(C)]` and has the same memory layout as the struct produced by the DPDK +/// `RTE_ACL_RULE_DEF(name, N)` macro: +/// +/// ```c +/// struct name { +/// struct rte_acl_rule_data data; +/// struct rte_acl_field field[N]; +/// }; +/// ``` +/// +/// Because of this layout guarantee, a `*const Rule` can be cast to `*const rte_acl_rule` and +/// passed directly to [`rte_acl_add_rules`][dpdk_sys::rte_acl_add_rules], as long as the ACL +/// context was created with `rule_size = core::mem::size_of::>()`. +/// +/// # Const parameter `N` +/// +/// `N` is the number of fields in this rule and must match the number of +/// [`FieldDef`][super::field::FieldDef] entries in the +/// [`AclBuildConfig`][super::config::AclBuildConfig] used to build the +/// [`AclContext`][super::context::AclContext]. Using the same const generic for both the context +/// and its rules catches field-count mismatches at compile time. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct Rule { + /// Rule metadata: category mask, priority, and user data. + /// + /// Private so that constructing a `Rule` must go through + /// [`Rule::new`], which enforces the `N > 0` compile-time check. Read via + /// [`Rule::data`] / [`Rule::data_mut`]. + data: RuleData, + /// Field values (one per field definition in the ACL context). + /// + /// Private for the same reason as `data` -- see the doc above. Read via + /// [`Rule::fields`] / [`Rule::fields_mut`]. + fields: [AclField; N], +} + +impl Rule { + /// Compile-time guard: a zero-field rule has nothing to match against + /// and DPDK would reject it at build time anyway. Catch it earlier. + const _CHECK_N_NONZERO: () = assert!(N > 0, "Rule requires N > 0"); + + /// Compile-time guard: `Rule` must have exactly the layout produced by + /// the C macro `RTE_ACL_RULE_DEF(_, N)`: 12 bytes of `rte_acl_rule_data` + /// plus 4 bytes of padding (to reach 8-byte alignment of `rte_acl_field`) + /// plus `N * 16` bytes of fields. This is evaluated for every concrete + /// `N` reached at runtime (forced via the let-binding in `new`). + const _CHECK_LAYOUT: () = { + let expected = mem::size_of::() + + N * mem::size_of::(); + assert!( + mem::size_of::() == expected, + "Rule layout must match RTE_ACL_RULE_DEF(_, N)" + ); + assert!( + mem::align_of::() == mem::align_of::(), + "Rule alignment must match rte_acl_rule" + ); + }; + + /// The size of this rule type in bytes, suitable for passing as `rule_size` when creating an + /// ACL context. + /// + /// This is equivalent to `core::mem::size_of::>()` but provided as a named constant + /// for clarity at call sites. + pub const RULE_SIZE: u32 = mem::size_of::() as u32; + + /// Create a new rule. + /// + /// # Arguments + /// + /// * `data` -- the rule metadata (category mask, priority, and user data). + /// * `fields` -- the field values for this rule; one entry per field definition. + #[must_use] + pub const fn new(data: RuleData, fields: [AclField; N]) -> Self { + // Force evaluation of the const checks at every instantiation of `new`. + let () = Self::_CHECK_N_NONZERO; + let () = Self::_CHECK_LAYOUT; + Self { data, fields } + } + + /// Borrow the rule metadata. + #[must_use] + pub const fn data(&self) -> &RuleData { + &self.data + } + + /// Mutable access to the rule metadata. + /// + /// Note: mutations made through this reference are not re-validated + /// until the [`Rule`] is handed to + /// [`AclContext::add_rules`][super::context::AclContext::add_rules], + /// which calls [`validate`][Rule::validate] before forwarding to + /// DPDK. Any out-of-range mutation (e.g. setting `category_mask` + /// bits beyond `num_categories`) is caught at that point. + #[must_use] + pub const fn data_mut(&mut self) -> &mut RuleData { + &mut self.data + } + + /// Borrow the field values. + #[must_use] + pub const fn fields(&self) -> &[AclField; N] { + &self.fields + } + + /// Mutable access to the field values. + /// + /// See [`data_mut`][Rule::data_mut] for the re-validation note -- + /// the same caveat applies: mutations are checked against the + /// build config at + /// [`add_rules`][super::context::AclContext::add_rules] time. + #[must_use] + pub const fn fields_mut(&mut self) -> &mut [AclField; N] { + &mut self.fields + } + + /// Validate this rule's field values against the layout in + /// [`AclBuildConfig`][super::config::AclBuildConfig]. + /// + /// Run before each [`add_rules`][super::context::AclContext::add_rules] + /// call by the wrapper; exposed publicly so callers can pre-flight + /// rules in test fixtures or batch validators. + /// + /// # Errors + /// + /// Returns [`InvalidRule`][super::error::InvalidRule] on the first + /// violation found. Specifically catches: + /// + /// - **Soundness-critical:** a [`FieldType::Mask`][super::field::FieldType::Mask] + /// field whose `mask_range` (prefix length) exceeds the field's bit + /// width. DPDK would compute `RTE_ACL_MASKLEN_TO_BITMASK(prefix_len, size)` + /// on this, which shifts by `>= 8 * size` -- undefined behaviour in C. + /// - A [`FieldType::Range`][super::field::FieldType::Range] field with + /// reversed low/high bounds. + /// - A `category_mask` with bits set at positions + /// `>= config.num_categories()` (DPDK would silently mask them off). + /// + /// Each field is read through the union member that **DPDK** reads + /// for that field type: + /// + /// - [`FieldType::Mask`][super::field::FieldType::Mask]: `mask_range` + /// is read via `u64_`, because DPDK feeds the entire 64-bit value + /// to `RTE_ACL_MASKLEN_TO_BITMASK`. Validating via the same view + /// catches big-endian narrow writes (where `from_u8(_, 1)` lands + /// at the MSB of the union and would shift by `>= 8 * size` -- UB + /// in C). On little-endian targets the `u64_` view and the + /// size-specific view agree. + /// - [`FieldType::Range`][super::field::FieldType::Range]: `value` / + /// `mask_range` are read through the size-appropriate union member + /// (`u8_` for `FieldSize::One`, `u16_` for `Two`, `u32_` for `Four`), + /// because DPDK's range-trie generator reads the bounds byte-wise + /// over `size` bytes. Garbage bits in wider union members are + /// ignored: DPDK never reads through them for a size-narrower field. + /// - [`FieldType::Bitmask`][super::field::FieldType::Bitmask]: not + /// validated here. DPDK reads the bitmask byte-wise over `size` + /// bytes and an unsatisfiable `value & !mask_range != 0` predicate + /// produces a dead rule, not UB. + pub fn validate( + &self, + config: &super::config::AclBuildConfig, + ) -> Result<(), super::error::InvalidRule> { + use super::error::InvalidRule; + use super::field::FieldType; + + // category_mask: any bit at position >= num_categories will be + // silently masked out by DPDK at build time. Reject up-front so + // the rule's intended category set is what actually gets matched. + let num_categories = config.num_categories(); + let category_mask = self.data.category_mask.get(); + let allowed_categories: u32 = if num_categories >= 32 { + u32::MAX + } else { + (1u32 << num_categories) - 1 + }; + let extra_bits = category_mask & !allowed_categories; + if extra_bits != 0 { + return Err(InvalidRule::CategoryMaskExceedsNumCategories { + category_mask, + num_categories, + extra_bits, + }); + } + + for def in config.field_defs() { + // field_index < N is guaranteed by AclBuildConfig::new. + let field = &self.fields[def.field_index() as usize]; + let size_bytes = def.size() as u8; + let max_bits = u32::from(size_bytes) * 8; + + match def.field_type() { + FieldType::Mask => { + // DPDK reads `mask_range.u64` for MASK fields and + // feeds it to `RTE_ACL_MASKLEN_TO_BITMASK`, which + // shifts `(uint64_t)-1` by `8 * size - prefix_length`. + // We must validate against the same view DPDK will + // see: on big-endian, a narrow constructor like + // `from_u8(_, 1)` lands at the most-significant + // byte of the union and reading `mask_range.u64` + // yields `1 << 56`, far exceeding `max_bits` and + // making the C shift undefined. Validating via + // `mask_range_u64` rejects that input up-front + // with a clear error rather than silently passing + // a UB-triggering value to DPDK. On little-endian + // (currently the only tested target) the u64 view + // and the size-specific view agree, so this + // changes nothing for LE callers. + let prefix_length = field.mask_range_u64(); + if prefix_length > u64::from(max_bits) { + return Err(InvalidRule::PrefixLengthOutOfRange { + field_index: def.field_index(), + prefix_length, + max_bits, + }); + } + } + FieldType::Range => { + // DPDK reads RANGE bounds byte-wise over `size` + // bytes (see `acl_gen_range_trie`), so the + // size-matching union member is the right view + // for the bounds-ordering check. + let (value, mask_range): (u64, u64) = match def.size() { + super::field::FieldSize::One => ( + u64::from(field.value_u8()), + u64::from(field.mask_range_u8()), + ), + super::field::FieldSize::Two => ( + u64::from(field.value_u16()), + u64::from(field.mask_range_u16()), + ), + super::field::FieldSize::Four => ( + u64::from(field.value_u32()), + u64::from(field.mask_range_u32()), + ), + }; + if value > mask_range { + return Err(InvalidRule::RangeReversed { + field_index: def.field_index(), + low: value, + high: mask_range, + }); + } + } + FieldType::Bitmask => { + // No wrapper-side check. DPDK reads BITMASK + // value/mask_range byte-wise over `size` bytes + // and ignores wider bytes. A user-mistake like + // `value & !mask_range != 0` (an unsatisfiable + // bitmask predicate) is not UB; it just produces + // a dead rule. If a future lint pass surfaces + // those, it belongs in a separate diagnostic + // module, not in the soundness-critical + // validator here. + } + } + } + Ok(()) + } +} + +impl fmt::Display for Rule { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Rule<{N}> {{ {}, fields: [", self.data)?; + for (i, field) in self.fields.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{field}")?; + } + write!(f, "] }}") + } +} + +// --------------------------------------------------------------------------- +// Layout verification +// --------------------------------------------------------------------------- + +/// Compile-time pins for the DPDK struct sizes that the [`Rule`]`` layout +/// formula depends on. +/// +/// The field array in [`rte_acl_rule`][dpdk_sys::rte_acl_rule] starts at offset +/// 16 (12 bytes of `rte_acl_rule_data` + 4 bytes of padding to reach 8-byte +/// alignment of `rte_acl_field`), so the layout invariant +/// `size_of::>() == size_of::() + N * size_of::()` +/// is checked for every concrete `N` by [`Rule::_CHECK_LAYOUT`], not by spot +/// checks here. +const _: () = { + // rte_acl_rule_data is 12 bytes, alignment 4 + assert!(mem::size_of::() == 12); + assert!(mem::align_of::() == 4); + // rte_acl_field is 16 bytes, alignment 8 + assert!(mem::size_of::() == 16); + assert!(mem::align_of::() == 8); + // rte_acl_rule (with flexible array) is 16 bytes base, alignment 8 + assert!(mem::size_of::() == 16); + assert!(mem::align_of::() == 8); +}; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rule_data_display() { + let data = RuleData { + category_mask: CategoryMask::new(0x1).unwrap(), + priority: Priority::new(100).unwrap(), + userdata: 42.try_into().unwrap(), + }; + let s = format!("{data}"); + assert!(s.contains("category_mask: 0x00000001")); + assert!(s.contains("priority: 100")); + assert!(s.contains("userdata: 42")); + } + + // Tests below cross between union members (write narrow, read u64) + // and therefore observe the host's endianness. Gated to LE because: + // + // - On LE the narrow value lands in the low bytes of the union, + // so reading u64 yields the same numeric value zero-extended. + // - On BE the narrow value lands in the high bytes, so the + // numeric u64 read would be `value << (64 - 8*size)`. + // + // The wrapper supports BE for the actual data flow (input data is + // in host byte order; DPDK reads through the matching union member + // on the same host). Only the test's cross-width readback is + // endian-dependent. + #[cfg(target_endian = "little")] + #[test] + fn acl_field_from_u8_zeroes_upper_bytes() { + let field = AclField::from_u8(0xAB, 0xCD); + assert_eq!(field.value_u64(), 0xAB); + assert_eq!(field.mask_range_u64(), 0xCD); + } + + #[cfg(target_endian = "little")] + #[test] + fn acl_field_from_u16_zeroes_upper_bytes() { + let field = AclField::from_u16(0xABCD, 0x1234); + assert_eq!(field.value_u64(), 0xABCD); + assert_eq!(field.mask_range_u64(), 0x1234); + } + + #[cfg(target_endian = "little")] + #[test] + fn acl_field_from_u32_zeroes_upper_bytes() { + let field = AclField::from_u32(0xDEAD_BEEF, 0xFFFF_FF00); + assert_eq!(field.value_u64(), 0xDEAD_BEEF); + assert_eq!(field.mask_range_u64(), 0xFFFF_FF00); + } + + #[test] + fn acl_field_from_u64_raw_full_range() { + let field = AclField::from_u64_raw(0x0123_4567_89AB_CDEF, 0xFEDC_BA98_7654_3210); + assert_eq!(field.value_u64(), 0x0123_4567_89AB_CDEF); + assert_eq!(field.mask_range_u64(), 0xFEDC_BA98_7654_3210); + } + + #[test] + fn acl_field_zero_is_all_zero() { + let w = AclField::zero(); + assert_eq!(w.value_u64(), 0); + assert_eq!(w.mask_range_u64(), 0); + } + + #[test] + fn acl_field_equality() { + let a = AclField::from_u32(10, 20); + let b = AclField::from_u32(10, 20); + let c = AclField::from_u32(10, 21); + assert_eq!(a, b); + assert_ne!(a, c); + } + + #[cfg(target_endian = "little")] + #[test] + fn acl_field_debug_is_hex() { + // The hex digits depend on which bytes of the u64 the narrow + // u32 write lands in -- LE-specific. See the note on + // `acl_field_from_u8_zeroes_upper_bytes`. + let field = AclField::from_u32(0xFF, 0xAA); + let dbg = format!("{field:?}"); + assert!(dbg.contains("0x00000000000000ff"), "got: {dbg}"); + assert!(dbg.contains("0x00000000000000aa"), "got: {dbg}"); + } + + #[test] + fn rule_display() { + let rule: Rule<2> = Rule::new( + RuleData { + category_mask: CategoryMask::new(1).unwrap(), + priority: Priority::new(10).unwrap(), + userdata: 1.try_into().unwrap(), + }, + [AclField::from_u32(0, 0), AclField::from_u16(80, 80)], + ); + let s = format!("{rule}"); + assert!(s.starts_with("Rule<2>")); + } + + #[test] + fn rule_equality() { + let r1: Rule<1> = Rule::new( + RuleData { + category_mask: CategoryMask::new(1).unwrap(), + priority: Priority::new(1).unwrap(), + userdata: 1.try_into().unwrap(), + }, + [AclField::from_u32(100, 200)], + ); + let r2 = r1; + assert_eq!(r1, r2); + } + + #[test] + fn rule_size_constant_matches_size_of() { + assert_eq!(Rule::<1>::RULE_SIZE as usize, mem::size_of::>()); + assert_eq!(Rule::<5>::RULE_SIZE as usize, mem::size_of::>()); + assert_eq!(Rule::<10>::RULE_SIZE as usize, mem::size_of::>()); + } + + #[test] + fn priority_constants_match_dpdk() { + assert_eq!(priority::MIN, 1); + assert_eq!( + priority::MAX, + dpdk_sys::_bindgen_ty_4::RTE_ACL_MAX_PRIORITY as i32 + ); + } + + /// Property: `Priority::new` accepts exactly the closed interval + /// `[priority::MIN, priority::MAX]` and rejects everything else. + #[test] + fn priority_new_validates_range() { + bolero::check!().with_type::().for_each(|value: &i32| { + let result = Priority::new(*value); + if (priority::MIN..=priority::MAX).contains(value) { + let p = result.unwrap_or_else(|_| { + panic!("Priority::new({value}) should accept in-range value") + }); + assert_eq!(p.get(), *value); + } else { + assert!( + result.is_err(), + "Priority::new({value}) should reject out-of-range value" + ); + } + }); + } +} diff --git a/dpdk/src/eal.rs b/dpdk/src/eal.rs index 65a450b94d..0e633aa1cc 100644 --- a/dpdk/src/eal.rs +++ b/dpdk/src/eal.rs @@ -8,10 +8,10 @@ use alloc::ffi::CString; use alloc::format; use alloc::string::ToString; use alloc::vec::Vec; +use core::ffi::CStr; use core::ffi::c_int; use core::fmt::{Debug, Display}; use dpdk_sys; -use std::ffi::CStr; use tracing::{error, info, warn}; /// Safe wrapper around the DPDK Environment Abstraction Layer (EAL). @@ -83,7 +83,12 @@ impl ValidatedEalArgs { ) -> Result { let args: Vec<_> = args.into_iter().map(|s| s.as_ref().to_string()).collect(); let len = args.len(); - if len > c_int::MAX as usize { + // Reserve one slot for the argv[0] placeholder that `init` prepends + // before calling rte_eal_init. Without this, len == c_int::MAX as + // usize would pass validation here and then overflow the i32 cast + // when computing argc for rte_eal_init. + const MAX_USER_ARGS: usize = (c_int::MAX as usize).saturating_sub(1); + if len > MAX_USER_ARGS { return Err(IllegalEalArguments::TooLong(len)); } match args.iter().find(|s| !s.is_ascii()) { @@ -109,7 +114,8 @@ impl ValidatedEalArgs { /// /// Panics if /// -/// 1. There are more than `c_int::MAX` arguments. +/// 1. There are more than `c_int::MAX - 1` arguments (the `-1` reserves a +/// slot for the `argv[0]` placeholder). /// 2. The arguments are not valid ASCII strings. /// 3. The EAL initialization fails. /// 4. The EAL has already been initialized. @@ -127,8 +133,64 @@ pub fn init(args: impl IntoIterator>) -> Eal { let mut args = ValidatedEalArgs::new(args).unwrap_or_else(|e| { Eal::fatal_error(e.to_string()); }); - let mut c_args: Vec<_> = args.0.iter_mut().map(|s| s.as_ptr().cast_mut()).collect(); + // EAL treats argv[0] as the program name and ignores it; this + // slot would otherwise eat the first real flag. We sidestep + // this by prepending a placeholder program name as the first + // owned CString. + args.0.insert(0, c"dataplane".to_owned()); + + // Move every CString into a raw `*mut c_char` via + // `CString::into_raw`. This is the only safe way to obtain a + // pointer with full mutable provenance for FFI: `as_ptr()` on + // a `CString` (or `&CString` reborrowed from `&mut CString`) + // carries SharedReadOnly provenance under Stacked / Tree + // Borrows, and any write through `as_ptr().cast_mut()` would + // be UB even though the allocation is writable. + // + // The pinned DPDK source (`rte_eal_init` + its getopt-based + // option parser) only permutes the argv **pointer array** -- + // it does not modify the bytes of any individual argv string + // and does not change any string's NUL-terminated length. + // The `CString::from_raw` cleanup below depends on that: + // `from_raw` is only sound if the string length is unchanged + // from what `into_raw` produced. + // + // We still use `into_raw` (rather than `as_ptr().cast_mut()`) + // because `rte_eal_init`'s public contract permits the EAL or + // any argument parser it calls to modify argv strings in + // place (`setproctitle`-style program-name manipulation, + // `getopt_long`-style `optarg` rewrites). Our pinned DPDK + // does not exercise that allowance, but `into_raw` gives us + // mut-clean pointer provenance regardless. If a future DPDK + // upgrade ever started rewriting argv strings in place, the + // round-trip here is still pointer-provenance-sound but the + // reclamation path would need to switch to a non-length- + // dependent strategy (e.g. `libc::free` on the original + // pointers, then `mem::forget` the CStrings). + // + // Reclamation note: `rte_eal_init` does getopt-style permutation + // on the argv array, so the order in `c_args` after the FFI + // call is **not** the order on entry. We snapshot the + // pre-init pointer list in `original_ptrs` to reclaim each + // CString exactly once with `CString::from_raw`, regardless + // of how DPDK reorders `c_args`. The `_reclaimed` Vec must + // drop **before** the scope exits (and therefore before the + // `RteAllocator::mark_initialized` allocator swap below) so + // the system allocator that produced each CString is the one + // that frees it. + let mut c_args: Vec<*mut core::ffi::c_char> = + args.0.drain(..).map(CString::into_raw).collect(); + let original_ptrs: Vec<*mut core::ffi::c_char> = c_args.clone(); let ret = unsafe { dpdk_sys::rte_eal_init(c_args.len() as _, c_args.as_mut_ptr() as _) }; + // SAFETY: each pointer in `original_ptrs` came from + // `CString::into_raw` above; we have not transferred ownership + // elsewhere (DPDK does not retain pointers from argv after + // `rte_eal_init` returns). Using the pre-init snapshot avoids + // aliasing if DPDK permuted `c_args`. + let _reclaimed: Vec = original_ptrs + .into_iter() + .map(|p| unsafe { CString::from_raw(p) }) + .collect(); if ret < 0 { EalErrno::assert(unsafe { dpdk_sys::rte_errno_get() }); } diff --git a/dpdk/src/lib.rs b/dpdk/src/lib.rs index 4b06491c04..26ea73a048 100644 --- a/dpdk/src/lib.rs +++ b/dpdk/src/lib.rs @@ -33,6 +33,7 @@ extern crate alloc; extern crate core; +pub mod acl; pub mod dev; pub mod eal; pub mod flow; diff --git a/justfile b/justfile index 63450dc071..9b9d520dd9 100644 --- a/justfile +++ b/justfile @@ -52,17 +52,32 @@ _cargo_profile_flag := if profile == "debug" { "" } else { "--profile " + profil # filters for nextest # -# Under `shuttle`, isolate the bolero x shuttle suite (a `shuttle` -# substring matches both the test binary and the test-name -# convention). +# Under `shuttle`, the legacy `dataplane-quiescent` test layout had a +# `shuttle` binary that hosted the bolero x shuttle suite, and we used +# `--package=shuttle` (now an `-E 'package(shuttle)'`-style filter +# embedded in nextest's argv) to isolate it. Today that suite lives in +# `concurrency/tests/quiescent_shuttle.rs`, and the test binary is +# `quiescent_shuttle`; matching the substring `shuttle` is good enough. # -# Under `loom`, the legacy filter `-E 'binary(loom)'` matched the -# old `dataplane-quiescent` `loom` test binary. After absorbing -# the crate, the binary is `quiescent_loom` (and later test files -# add more); an empty filter lets nextest walk every archived -# binary. Tests that don't apply under loom are cfg-gated out and -# compile to zero entries. -filter := if features == "shuttle" { "shuttle" } else { "" } +# Under `loom`, the legacy filter `-E 'binary(loom)'` matched +# `quiescent_loom`, the single integration-test binary that opted into +# `loom::model`. After the concurrency rework, loom-compatible tests +# are spread across multiple binaries (`quiescent_model`, +# `thread_scope`, `arc_weak`, `stress_dispatch`); the rest are gated +# with `#![cfg(not(any(feature = "loom", ...)))]` and compile down to +# zero tests under the loom feature. An empty filter is therefore the +# right answer: nextest walks every archived binary, the cfg-gated +# ones contain no tests, and the loom-compatible ones run under their +# `#[concurrency::test]`-routed `loom::model` body. +# Match all shuttle variants (`shuttle`, `shuttle_pct`, `shuttle_dfs`). +# Under any shuttle backend, `concurrency::sync` types ARE shuttle +# primitives, and touching them outside a `shuttle::check_*`-wrapped +# body panics with `ExecutionState NotSet`. Tests that are designed +# to run under shuttle either go through `#[concurrency::test]` (which +# emits a `concurrency_model::` leaf -- the substring matches) +# or live in a `*_shuttle` module / `*shuttle*` binary by convention. +# Other workspace tests would fail spuriously without this filter. +filter := if features =~ "^shuttle" { "shuttle" } else { "" } # instrumentation mode (none/coverage) instrument := "none" diff --git a/nix/pkgs/dpdk-wrapper/src/dpdk_wrapper.h b/nix/pkgs/dpdk-wrapper/src/dpdk_wrapper.h index 3ebfe21e7d..a8b881dfc3 100644 --- a/nix/pkgs/dpdk-wrapper/src/dpdk_wrapper.h +++ b/nix/pkgs/dpdk-wrapper/src/dpdk_wrapper.h @@ -3,6 +3,7 @@ #include +#include #include #include #include diff --git a/nix/pkgs/dpdk/default.nix b/nix/pkgs/dpdk/default.nix index 5716bc3c74..c02a0b78b5 100644 --- a/nix/pkgs/dpdk/default.nix +++ b/nix/pkgs/dpdk/default.nix @@ -47,7 +47,6 @@ stdenv.mkDerivation { mesonFlags = let disabledLibs = [ - "acl" "argparse" "bbdev" "bitratestats" @@ -86,6 +85,7 @@ stdenv.mkDerivation { "table" ]; enabledLibs = [ + "acl" "cryptodev" # required for vhost "dmadev" # required by vhost "ethdev"