From c57653202343e5889e41c4dae2e0a9b320cdb312 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Fri, 15 May 2026 11:59:08 +0200 Subject: [PATCH] =?UTF-8?q?feat(safety):=20Phase=201=20=E2=80=94=20--safet?= =?UTF-8?q?y-bounds=20umbrella=20+=20RV32=20software=20bounds=20+=20RV32?= =?UTF-8?q?=20div=20trap=20+=20safety-manifest.json?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements Phase 1 of docs/binary-safety-design.md (#110). * CLI: new `--safety-bounds ` flag. Keeps `--bounds-check` as a deprecated alias mapping to `--safety-bounds software`; emits a one-line deprecation notice. * CompileConfig grows a `safety_bounds: SafetyBounds` field; the ARM and RISC-V backends both read it. * Plumb through `BoundsCheckConfig` (now `SafetyBounds` at the public boundary). Existing `None` / `Software` semantics preserved on ARM. * RV32 selector: ported the software bounds-check sequence (`bgeu addr, mem_size, trap_label; ...; trap_label: ebreak`) to every i32 load/store and sub-word variant. * RV32 selector: emit `bne rs2, x0, ok; ebreak; ok: div` (and `divu`, `rem`, `remu`) trap-on-zero guards. Mirrors the existing ARM pattern (instruction_selector.rs:3895). * `safety-manifest.json` written alongside the output ELF whenever any safety flag is enabled. Schema: synth_version, target_triple, safety_bounds, safety_div_zero, safety_div_overflow, linear_memory_bytes. ## Hand-tested * `synth compile examples/wat/simple_add.wat --safety-bounds mpu -o /tmp/sa.elf --cortex-m` succeeds; `/tmp/sa.safety-manifest.json` contains the expected fields. * `synth compile examples/wat/simple_add.wat --bounds-check -o /tmp/sa.elf --cortex-m` prints the deprecation notice and still produces a working ELF. * `synth compile /tmp/rv_div.wat --backend riscv --target riscv32imac --safety-bounds software --all-exports -o /tmp/rv_div.elf` succeeds; RV32 ELF contains the div-by-zero trap sequence; manifest emitted. ## Out of scope (later phases per docs/binary-safety-design.md §6) * Phase 2: stack overflow (MSPLIM/PSPLIM/canary), CFI BTI/PAC * Phase 3: linear-memory poisoning * Phase 4: component-model type confusion * Phase 5: parameterise Compilation.v by SafetyProfile in Rocq ## Verification * `cargo test --workspace --exclude synth-verify` — all pass (z3-sys download blocked in this environment; synth-verify excluded accordingly, CI will exercise it). * `cargo clippy --workspace --exclude synth-verify --all-targets -- -D warnings` — clean. * `cargo fmt --check` — clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/synth-backend-riscv/src/backend.rs | 176 +++++- crates/synth-backend-riscv/src/lib.rs | 5 +- crates/synth-backend-riscv/src/selector.rs | 511 +++++++++++++++++- crates/synth-backend/src/arm_backend.rs | 73 ++- crates/synth-cli/src/main.rs | 104 +++- crates/synth-core/src/backend.rs | 100 +++- crates/synth-core/src/lib.rs | 2 + crates/synth-core/src/safety_manifest.rs | 173 ++++++ .../src/instruction_selector.rs | 35 +- 9 files changed, 1120 insertions(+), 59 deletions(-) create mode 100644 crates/synth-core/src/safety_manifest.rs diff --git a/crates/synth-backend-riscv/src/backend.rs b/crates/synth-backend-riscv/src/backend.rs index ad6e5cb..a0dbcbc 100644 --- a/crates/synth-backend-riscv/src/backend.rs +++ b/crates/synth-backend-riscv/src/backend.rs @@ -6,9 +6,10 @@ //! Track B2/B3/B4 deliverable. use crate::elf_builder::{RiscVElfBuilder, RiscVElfFunction}; -use crate::selector::select_simple; +use crate::selector::{RvBoundsMode, SelectorOptions, select_with_options}; use synth_core::backend::{ Backend, BackendCapabilities, BackendError, CompilationResult, CompileConfig, CompiledFunction, + SafetyBounds, }; use synth_core::target::{ArchFamily, IsaVariant, TargetSpec}; use synth_core::wasm_decoder::DecodedModule; @@ -65,14 +66,23 @@ impl Backend for RiscVBackend { )); } + // Resolve the bounds-check mode once, using the module's first memory + // (if any) to populate the size for Software/Mask modes. + let mem_size = module + .memories + .first() + .map(|m| m.initial_bytes()) + .unwrap_or(0); + let opts = build_options(config, mem_size)?; + let mut functions = Vec::new(); let mut elf_funcs = Vec::new(); for func in &exports { let name = func.export_name.clone().unwrap(); - let compiled = self.compile_function(&name, &func.ops, config)?; + let compiled = compile_function_with_opts(&name, &func.ops, config, opts)?; elf_funcs.push(RiscVElfFunction { name: compiled.name.clone(), - ops: compile_to_riscv_ops(&func.ops, &compiled)?, + ops: compile_to_riscv_ops(&func.ops, opts, &compiled)?, }); functions.push(compiled); } @@ -96,26 +106,11 @@ impl Backend for RiscVBackend { config: &CompileConfig, ) -> Result { ensure_supported_target(&config.target)?; - - let num_params = count_params(ops); - let selection = select_simple(ops, num_params) - .map_err(|e| BackendError::CompilationFailed(format!("RISC-V selector: {e}")))?; - - // Encode the function via the ELF builder's per-function pipeline so - // we benefit from label resolution. We discard the ELF and keep the - // raw bytes — that's what `CompiledFunction` carries. - let elf_func = RiscVElfFunction { - name: name.to_string(), - ops: selection.ops, - }; - let bytes = encode_function_bytes(&elf_func)?; - - Ok(CompiledFunction { - name: name.to_string(), - code: bytes, - wasm_ops: ops.to_vec(), - relocations: Vec::new(), - }) + // No module context — default the memory size to 1 wasm page so that + // Software/Mask modes can still synthesise the guard. Callers that + // need a different size go through `compile_module`. + let opts = build_options(config, 64 * 1024)?; + compile_function_with_opts(name, ops, config, opts) } fn is_available(&self) -> bool { @@ -123,6 +118,60 @@ impl Backend for RiscVBackend { } } +/// Build `SelectorOptions` from the `CompileConfig`'s safety knobs. +/// +/// Errors out (compile-time) when the user picks `SafetyBounds::Mask` with a +/// non-power-of-two memory size, per the design doc §3.1 path C. +fn build_options(config: &CompileConfig, mem_size: u32) -> Result { + let bounds = match config.effective_safety_bounds() { + SafetyBounds::None => RvBoundsMode::None, + SafetyBounds::Mpu => RvBoundsMode::Pmp, + SafetyBounds::Software => RvBoundsMode::Software { mem_size }, + SafetyBounds::Mask => { + if mem_size == 0 || !mem_size.is_power_of_two() { + return Err(BackendError::UnsupportedConfig(format!( + "--safety-bounds mask requires a power-of-two linear-memory size, got {} bytes — switch to --safety-bounds software for the deterministic check", + mem_size + ))); + } + RvBoundsMode::Mask { mask: mem_size - 1 } + } + }; + Ok(SelectorOptions { + bounds, + signed_div_overflow_trap: true, + }) +} + +fn compile_function_with_opts( + name: &str, + ops: &[WasmOp], + config: &CompileConfig, + opts: SelectorOptions, +) -> Result { + ensure_supported_target(&config.target)?; + + let num_params = count_params(ops); + let selection = select_with_options(ops, num_params, opts) + .map_err(|e| BackendError::CompilationFailed(format!("RISC-V selector: {e}")))?; + + // Encode the function via the ELF builder's per-function pipeline so + // we benefit from label resolution. We discard the ELF and keep the + // raw bytes — that's what `CompiledFunction` carries. + let elf_func = RiscVElfFunction { + name: name.to_string(), + ops: selection.ops, + }; + let bytes = encode_function_bytes(&elf_func)?; + + Ok(CompiledFunction { + name: name.to_string(), + code: bytes, + wasm_ops: ops.to_vec(), + relocations: Vec::new(), + }) +} + fn ensure_supported_target(target: &TargetSpec) -> Result<(), BackendError> { if !matches!(target.family, ArchFamily::RiscV) { return Err(BackendError::UnsupportedConfig(format!( @@ -209,10 +258,11 @@ fn encode_function_bytes(f: &RiscVElfFunction) -> Result, BackendError> /// arch-neutral, so this re-derives it on demand for ELF emission. fn compile_to_riscv_ops( ops: &[WasmOp], + opts: SelectorOptions, _compiled: &CompiledFunction, ) -> Result, BackendError> { let num_params = count_params(ops); - let selection = select_simple(ops, num_params) + let selection = select_with_options(ops, num_params, opts) .map_err(|e| BackendError::CompilationFailed(format!("RISC-V selector: {e}")))?; Ok(selection.ops) } @@ -317,7 +367,7 @@ mod tests { wasm_ops: ops.clone(), relocations: Vec::new(), }; - let rv = compile_to_riscv_ops(&ops, &dummy).unwrap(); + let rv = compile_to_riscv_ops(&ops, SelectorOptions::wasm_compliant(), &dummy).unwrap(); // First two ops should move param regs into temporaries (immediate 0). assert!(matches!( rv[0], @@ -347,4 +397,80 @@ mod tests { } )); } + + // ─── Phase 1 safety-bounds plumbing ──────────────────────────────── + + #[test] + fn build_options_default_is_none() { + let cfg = CompileConfig::default(); + let opts = build_options(&cfg, 65536).unwrap(); + assert!(matches!(opts.bounds, RvBoundsMode::None)); + assert!(opts.signed_div_overflow_trap); + } + + #[test] + fn build_options_legacy_bounds_check_promotes_to_software() { + let cfg = CompileConfig { + bounds_check: true, + ..Default::default() + }; + let opts = build_options(&cfg, 65536).unwrap(); + assert!(matches!( + opts.bounds, + RvBoundsMode::Software { mem_size: 65536 } + )); + } + + #[test] + fn build_options_safety_bounds_mpu_maps_to_pmp() { + let cfg = CompileConfig { + safety_bounds: SafetyBounds::Mpu, + ..Default::default() + }; + let opts = build_options(&cfg, 65536).unwrap(); + assert!(matches!(opts.bounds, RvBoundsMode::Pmp)); + } + + #[test] + fn build_options_mask_requires_power_of_two() { + let cfg = CompileConfig { + safety_bounds: SafetyBounds::Mask, + ..Default::default() + }; + // 3000 bytes is not a power of two — should error. + let err = build_options(&cfg, 3000).unwrap_err(); + assert!(matches!(err, BackendError::UnsupportedConfig(_))); + + // 65536 bytes is a power of two — should succeed. + let opts = build_options(&cfg, 65536).unwrap(); + assert!(matches!(opts.bounds, RvBoundsMode::Mask { mask: 0xFFFF })); + } + + #[test] + fn compile_with_software_bounds_emits_bgeu_in_function_bytes() { + // i32.load + safety-bounds software should produce a 32-bit instruction + // whose lowest 7 bits == 0b1100011 (BRANCH opcode). + let b = RiscVBackend::new(); + let cfg = CompileConfig { + target: TargetSpec::riscv32imac(), + safety_bounds: SafetyBounds::Software, + ..Default::default() + }; + let ops = vec![ + WasmOp::LocalGet(0), + WasmOp::I32Load { + offset: 0, + align: 2, + }, + WasmOp::End, + ]; + let f = b.compile_function("ldsafe", &ops, &cfg).unwrap(); + // Look for at least one branch-opcode (0x63) instruction in the bytes. + let has_branch_opcode = f.code.chunks_exact(4).any(|w| (w[0] & 0x7F) == 0x63); + assert!( + has_branch_opcode, + "expected at least one BRANCH-opcode (0x63) word in: {:?}", + f.code + ); + } } diff --git a/crates/synth-backend-riscv/src/lib.rs b/crates/synth-backend-riscv/src/lib.rs index d0de09e..d228e90 100644 --- a/crates/synth-backend-riscv/src/lib.rs +++ b/crates/synth-backend-riscv/src/lib.rs @@ -31,5 +31,8 @@ pub use linker_script::{LinkerScriptConfig, RiscVLinkerScriptGenerator}; pub use pmp::{PMPAllocator, PMPEntry, PMPError, PMPMode, PMPPermissions}; pub use register::{Reg, RegClass}; pub use riscv_op::{Branch, Csr, RiscVOp}; -pub use selector::{RiscVSelection, SelectorError, select_simple}; +pub use selector::{ + RiscVSelection, RvBoundsMode, SelectorError, SelectorOptions, select_simple, + select_with_options, +}; pub use startup::{RiscVStartupGenerator, StartupConfig}; diff --git a/crates/synth-backend-riscv/src/selector.rs b/crates/synth-backend-riscv/src/selector.rs index 7541629..3eebc63 100644 --- a/crates/synth-backend-riscv/src/selector.rs +++ b/crates/synth-backend-riscv/src/selector.rs @@ -21,6 +21,57 @@ use crate::riscv_op::{Branch, RiscVOp}; use synth_core::wasm_op::WasmOp; use thiserror::Error; +/// Per-access memory-bounds policy for the RV32 selector. Mirrors the ARM +/// `BoundsCheckConfig` in spirit — see `docs/binary-safety-design.md` §3.1. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum RvBoundsMode { + /// No inline check (default; relies on PMP or trusted-module assumption). + #[default] + None, + /// Hardware PMP enforcement. Same code-gen as `None` but distinguished + /// so the safety-manifest can record the intent. + Pmp, + /// Software check: `bgeu addr, mem_size, Ltrap; ...; Ltrap: ebreak` + /// emitted before each load/store. The memory size is materialised + /// inline (`emit_load_imm`) per access — Phase 1 acceptably simple; + /// Phase 2 will reserve a callee-saved register for it. + Software { + /// Linear-memory size in bytes. Anything `addr + offset + access_size` + /// `>=` this triggers the trap. + mem_size: u32, + }, + /// AND-mask: `andi addr, addr, (mem_size - 1)` — power-of-two only. + /// Wraps on OOB rather than trapping; matches the ARM "Masking" mode. + Mask { + /// Must be `mem_size - 1`, where `mem_size` is a power of two. + mask: u32, + }, +} + +/// Options for RV32 instruction selection. Phase 1 of the binary-safety +/// roadmap surfaces a small set of safety knobs here; later phases will +/// likely group them into a `SafetyProfile` struct shared between backends. +#[derive(Debug, Clone, Copy, Default)] +pub struct SelectorOptions { + /// Memory-bounds policy. See `RvBoundsMode`. + pub bounds: RvBoundsMode, + /// Emit the `INT_MIN / -1` overflow guard around signed-division ops. + /// Always on for spec-compliant WASM output; exposed as a knob so the + /// fuzz harness can disable it when comparing IR-level behaviour only. + pub signed_div_overflow_trap: bool, +} + +impl SelectorOptions { + /// Construct an options bundle with both safety knobs at their compliant + /// defaults (signed-div overflow trap on, bounds-check off). + pub fn wasm_compliant() -> Self { + Self { + bounds: RvBoundsMode::None, + signed_div_overflow_trap: true, + } + } +} + #[derive(Debug, Error)] pub enum SelectorError { #[error("unsupported wasm op for RV32 skeleton: {0:?}")] @@ -67,10 +118,7 @@ const LINEAR_MEM_BASE: Reg = Reg::S11; /// - control: block, loop, if, else, end, br, br_if, return /// - misc: drop, nop, unreachable pub fn select(wasm_ops: &[WasmOp], num_params: u32) -> Result { - let mut ctx = Selector::new(num_params); - ctx.lower_seq(wasm_ops)?; - ctx.emit_return_epilogue(); - Ok(RiscVSelection { ops: ctx.out }) + select_with_options(wasm_ops, num_params, SelectorOptions::wasm_compliant()) } /// Backwards-compatible alias for the original simple selector. The new @@ -83,6 +131,19 @@ pub fn select_simple( select(wasm_ops, num_params) } +/// Same as [`select`], but lets the caller dial in safety options. +/// Phase 1 of `docs/binary-safety-design.md` §3.1 / §3.3. +pub fn select_with_options( + wasm_ops: &[WasmOp], + num_params: u32, + options: SelectorOptions, +) -> Result { + let mut ctx = Selector::new_with_options(num_params, options); + ctx.lower_seq(wasm_ops)?; + ctx.emit_return_epilogue(); + Ok(RiscVSelection { ops: ctx.out }) +} + /// Internal control-flow frame. Every wasm `block`/`loop`/`if` pushes one. struct ControlFrame { /// What kind of frame it is — affects br semantics (loop targets the @@ -125,10 +186,12 @@ struct Selector { next_label: u32, /// Tracks whether we already emitted the function-final return. emitted_return: bool, + /// Phase-1 safety options (bounds-check policy, signed-div overflow trap). + options: SelectorOptions, } impl Selector { - fn new(num_params: u32) -> Self { + fn new_with_options(num_params: u32, options: SelectorOptions) -> Self { // a0..a(min(7,num_params-1)) hold params, t0..t6 + s1..s10 are // available as temporaries. Note we deliberately do NOT use s0 (fp) // or s11 (linear-memory base) — they're reserved for the runtime. @@ -156,6 +219,7 @@ impl Selector { next_temp: 0, next_label: 0, emitted_return: false, + options, } } @@ -216,11 +280,15 @@ impl Selector { I32Add => self.bin(op, |rd, rs1, rs2| RiscVOp::Add { rd, rs1, rs2 })?, I32Sub => self.bin(op, |rd, rs1, rs2| RiscVOp::Sub { rd, rs1, rs2 })?, I32Mul => self.bin(op, |rd, rs1, rs2| RiscVOp::Mul { rd, rs1, rs2 })?, - I32DivS => self.bin_with_zero_trap(op, |rd, rs1, rs2| RiscVOp::Div { rd, rs1, rs2 })?, + I32DivS => { + self.bin_with_signed_div_traps(op, |rd, rs1, rs2| RiscVOp::Div { rd, rs1, rs2 })? + } I32DivU => { self.bin_with_zero_trap(op, |rd, rs1, rs2| RiscVOp::Divu { rd, rs1, rs2 })? } - I32RemS => self.bin_with_zero_trap(op, |rd, rs1, rs2| RiscVOp::Rem { rd, rs1, rs2 })?, + I32RemS => { + self.bin_with_signed_div_traps(op, |rd, rs1, rs2| RiscVOp::Rem { rd, rs1, rs2 })? + } I32RemU => { self.bin_with_zero_trap(op, |rd, rs1, rs2| RiscVOp::Remu { rd, rs1, rs2 })? } @@ -408,6 +476,77 @@ impl Selector { Ok(()) } + /// Variant of `bin_with_zero_trap` that also guards `INT_MIN / -1`, which + /// `div`/`rem` would silently return as `INT_MIN` / `0` respectively — WASM + /// semantics require a trap. See `docs/binary-safety-design.md` §3.3. + /// + /// Sequence: + /// ```text + /// bne rs2, zero, .Ldiv_ok ; divide-by-zero guard (existing) + /// ebreak + /// .Ldiv_ok: + /// ; signed-overflow guard (only when options.signed_div_overflow_trap) + /// lui tmin, %hi(INT_MIN) + /// addi tmin, tmin, %lo(INT_MIN) ; t = 0x80000000 + /// bne rs1, tmin, .Lsdiv_ok + /// li tneg1, -1 + /// bne rs2, tneg1, .Lsdiv_ok + /// ebreak + /// .Lsdiv_ok: + /// div rd, rs1, rs2 + /// ``` + fn bin_with_signed_div_traps(&mut self, op: &WasmOp, build: F) -> Result<(), SelectorError> + where + F: FnOnce(Reg, Reg, Reg) -> RiscVOp, + { + let (rs1, rs2) = self.pop_pair(op)?; + let rd = self.alloc_temp(); + // Trap 1: divisor == 0 + let ok_zero = self.fresh_label("Ldiv_ok"); + self.out.push(RiscVOp::Branch { + cond: Branch::Ne, + rs1: rs2, + rs2: Reg::ZERO, + label: ok_zero.clone(), + }); + self.out.push(RiscVOp::Ebreak); + self.out.push(RiscVOp::Label { name: ok_zero }); + + // Trap 2: INT_MIN / -1 + if self.options.signed_div_overflow_trap { + let ok_overflow = self.fresh_label("Lsdiv_ok"); + let tmin = self.alloc_temp(); + // INT_MIN = 0x80000000. emit_load_imm decomposes into lui+addi. + emit_load_imm(&mut self.out, tmin, i32::MIN); + // bne rs1, tmin, .Lsdiv_ok → dividend != INT_MIN, no overflow + self.out.push(RiscVOp::Branch { + cond: Branch::Ne, + rs1, + rs2: tmin, + label: ok_overflow.clone(), + }); + let neg1 = self.alloc_temp(); + self.out.push(RiscVOp::Addi { + rd: neg1, + rs1: Reg::ZERO, + imm: -1, + }); + // bne rs2, -1, .Lsdiv_ok → divisor != -1, no overflow + self.out.push(RiscVOp::Branch { + cond: Branch::Ne, + rs1: rs2, + rs2: neg1, + label: ok_overflow.clone(), + }); + self.out.push(RiscVOp::Ebreak); + self.out.push(RiscVOp::Label { name: ok_overflow }); + } + + self.out.push(build(rd, rs1, rs2)); + self.push_val(rd); + Ok(()) + } + // ────────── Comparisons ────────── fn lower_eqz(&mut self, op: &WasmOp) -> Result<(), SelectorError> { @@ -504,8 +643,109 @@ impl Selector { // ────────── Memory ────────── + /// Emit the per-access safety guard before a load/store, returning a + /// (possibly rewritten) address register. The returned register is what + /// later code uses as the effective wasm-side address. + /// + /// Behaviour by mode: + /// - `None` / `Pmp`: pass-through (no instructions emitted; PMP handles + /// faults via hardware). + /// - `Software { mem_size }`: emit + /// `addi guard, addr, +(offset + access_size - 1)` + /// `lui/addi mlim, mem_size` + /// `bgeu guard, mlim, Ltrap` + /// `j Lok ; Ltrap: ebreak ; Lok:` + /// The check guards the last byte of the access, matching the ARM + /// software-bounds-check from §3.1. + /// - `Mask { mask }`: emit `andi rd, addr, mask` (when `mask` fits in 12 bits) + /// or `lui/addi mtmp, mask; and rd, addr, mtmp` otherwise. The result is + /// the masked address; the caller uses *that* for the subsequent access. + fn emit_bounds_check( + &mut self, + addr: Reg, + offset: u32, + access_size: u32, + ) -> Result { + match self.options.bounds { + RvBoundsMode::None | RvBoundsMode::Pmp => Ok(addr), + RvBoundsMode::Software { mem_size } => { + let end_byte = offset.checked_add(access_size.saturating_sub(1)).ok_or( + SelectorError::ImmediateTooLarge { + value: offset as i64 + access_size as i64, + context: "bounds-check end byte", + }, + )?; + let guard = self.alloc_temp(); + if end_byte == 0 { + self.out.push(RiscVOp::Addi { + rd: guard, + rs1: addr, + imm: 0, + }); + } else if end_byte < 2048 { + self.out.push(RiscVOp::Addi { + rd: guard, + rs1: addr, + imm: end_byte as i32, + }); + } else { + // Materialise the offset into a temporary, then add. + let off_tmp = self.alloc_temp(); + emit_load_imm(&mut self.out, off_tmp, end_byte as i32); + self.out.push(RiscVOp::Add { + rd: guard, + rs1: addr, + rs2: off_tmp, + }); + } + let mlim = self.alloc_temp(); + emit_load_imm(&mut self.out, mlim, mem_size as i32); + let ok_label = self.fresh_label("Lbnd_ok"); + let trap_label = self.fresh_label("Lbnd_trap"); + // bgeu guard, mlim, Lbnd_trap → branch to trap when OOB + self.out.push(RiscVOp::Branch { + cond: Branch::Geu, + rs1: guard, + rs2: mlim, + label: trap_label.clone(), + }); + // happy path falls through to the load/store; insert an + // unconditional jump past the trap so we can place the + // ebreak at the end of the sequence. + self.out.push(RiscVOp::Jal { + rd: Reg::ZERO, + label: ok_label.clone(), + }); + self.out.push(RiscVOp::Label { name: trap_label }); + self.out.push(RiscVOp::Ebreak); + self.out.push(RiscVOp::Label { name: ok_label }); + Ok(addr) + } + RvBoundsMode::Mask { mask } => { + let masked = self.alloc_temp(); + if mask <= 0x7FF { + self.out.push(RiscVOp::Andi { + rd: masked, + rs1: addr, + imm: mask as i32, + }); + } else { + let mtmp = self.alloc_temp(); + emit_load_imm(&mut self.out, mtmp, mask as i32); + self.out.push(RiscVOp::And { + rd: masked, + rs1: addr, + rs2: mtmp, + }); + } + Ok(masked) + } + } + } + fn lower_load_word(&mut self, op: &WasmOp, offset: u32) -> Result<(), SelectorError> { let addr = self.pop_val(op)?; + let addr = self.emit_bounds_check(addr, offset, 4)?; let dst = self.alloc_temp(); // tmp = base + addr let tmp = self.alloc_temp(); @@ -531,6 +771,11 @@ impl Selector { kind: LoadKind, ) -> Result<(), SelectorError> { let addr = self.pop_val(op)?; + let access_size = match kind { + LoadKind::I8S | LoadKind::I8U => 1, + LoadKind::I16S | LoadKind::I16U => 2, + }; + let addr = self.emit_bounds_check(addr, offset, access_size)?; let dst = self.alloc_temp(); let tmp = self.alloc_temp(); self.out.push(RiscVOp::Add { @@ -574,6 +819,12 @@ impl Selector { ) -> Result<(), SelectorError> { let value = self.pop_val(op)?; let addr = self.pop_val(op)?; + let access_size = match kind { + StoreKind::Byte => 1, + StoreKind::Half => 2, + StoreKind::Word => 4, + }; + let addr = self.emit_bounds_check(addr, offset, access_size)?; let tmp = self.alloc_temp(); self.out.push(RiscVOp::Add { rd: tmp, @@ -1259,4 +1510,250 @@ mod tests { ); assert!(matches!(r, Err(SelectorError::ImmediateTooLarge { .. }))); } + + // ─── Phase 1 binary-safety tests ──────────────────────────────────── + + fn s_with_opts(ops: &[WasmOp], num_params: u32, o: SelectorOptions) -> Vec { + select_with_options(ops, num_params, o).unwrap().ops + } + + #[test] + fn rv32_software_bounds_emits_bgeu_and_ebreak() { + let opts = SelectorOptions { + bounds: RvBoundsMode::Software { mem_size: 0x10000 }, + signed_div_overflow_trap: true, + }; + let out = s_with_opts( + &[ + WasmOp::LocalGet(0), + WasmOp::I32Load { + offset: 0, + align: 2, + }, + WasmOp::End, + ], + 1, + opts, + ); + // Expect at least one BGEU against the memsize and one ebreak in the + // trap basic block. + assert!( + count(&out, |op| matches!( + op, + RiscVOp::Branch { + cond: Branch::Geu, + .. + } + )) >= 1, + "expected at least one bgeu for the bounds check, got: {:?}", + out + ); + assert!( + count(&out, |op| matches!(op, RiscVOp::Ebreak)) >= 1, + "expected an ebreak in the trap path" + ); + } + + #[test] + fn rv32_no_bounds_check_emits_no_bgeu() { + let opts = SelectorOptions::wasm_compliant(); + let out = s_with_opts( + &[ + WasmOp::LocalGet(0), + WasmOp::I32Load { + offset: 0, + align: 2, + }, + WasmOp::End, + ], + 1, + opts, + ); + // No bgeu should be emitted when the bounds mode is `None`. + assert!( + count(&out, |op| matches!( + op, + RiscVOp::Branch { + cond: Branch::Geu, + .. + } + )) == 0, + "no bounds-check bgeu expected when mode is None, got: {:?}", + out + ); + } + + #[test] + fn rv32_pmp_mode_emits_no_inline_check() { + let opts = SelectorOptions { + bounds: RvBoundsMode::Pmp, + signed_div_overflow_trap: true, + }; + let out = s_with_opts( + &[ + WasmOp::LocalGet(0), + WasmOp::I32Load { + offset: 0, + align: 2, + }, + WasmOp::End, + ], + 1, + opts, + ); + // PMP mode behaves the same as None in code-gen — hardware handles it. + assert!( + count(&out, |op| matches!( + op, + RiscVOp::Branch { + cond: Branch::Geu, + .. + } + )) == 0 + ); + } + + #[test] + fn rv32_mask_mode_emits_andi() { + // mask = 65535 (= 0x10000 - 1). 0xFFFF > 0x7FF so emit_load_imm + and. + let opts = SelectorOptions { + bounds: RvBoundsMode::Mask { mask: 0xFFFF }, + signed_div_overflow_trap: true, + }; + let out = s_with_opts( + &[ + WasmOp::LocalGet(0), + WasmOp::I32Load { + offset: 0, + align: 2, + }, + WasmOp::End, + ], + 1, + opts, + ); + // Either Andi (small mask) or And (large mask) appears for the mask op. + assert!( + count(&out, |op| matches!( + op, + RiscVOp::And { .. } | RiscVOp::Andi { .. } + )) >= 1 + ); + } + + #[test] + fn rv32_signed_div_emits_overflow_guard() { + let opts = SelectorOptions::wasm_compliant(); + let out = s_with_opts( + &[ + WasmOp::LocalGet(0), + WasmOp::LocalGet(1), + WasmOp::I32DivS, + WasmOp::End, + ], + 2, + opts, + ); + // Expect: bne rs2,zero (zero-divisor guard) AND bne rs1,INT_MIN (overflow) + // AND bne rs2,-1 (overflow). So at least 3 BNE-shaped branches. + let bne_count = count(&out, |op| { + matches!( + op, + RiscVOp::Branch { + cond: Branch::Ne, + .. + } + ) + }); + assert!( + bne_count >= 3, + "expected at least 3 BNEs (zero + INT_MIN + -1 guards), got {} in: {:?}", + bne_count, + out + ); + // And two ebreaks: one for div-by-zero, one for the overflow trap. + assert!(count(&out, |op| matches!(op, RiscVOp::Ebreak)) >= 2); + } + + #[test] + fn rv32_signed_div_overflow_trap_disabled_only_emits_zero_guard() { + let opts = SelectorOptions { + bounds: RvBoundsMode::None, + signed_div_overflow_trap: false, + }; + let out = s_with_opts( + &[ + WasmOp::LocalGet(0), + WasmOp::LocalGet(1), + WasmOp::I32DivS, + WasmOp::End, + ], + 2, + opts, + ); + // Only the zero-divisor BNE; no overflow guards. + let bne_count = count(&out, |op| { + matches!( + op, + RiscVOp::Branch { + cond: Branch::Ne, + .. + } + ) + }); + assert_eq!(bne_count, 1); + assert_eq!(count(&out, |op| matches!(op, RiscVOp::Ebreak)), 1); + } + + #[test] + fn rv32_unsigned_div_skips_overflow_guard() { + // Unsigned division has no INT_MIN/-1 special case. + let opts = SelectorOptions::wasm_compliant(); + let out = s_with_opts( + &[ + WasmOp::LocalGet(0), + WasmOp::LocalGet(1), + WasmOp::I32DivU, + WasmOp::End, + ], + 2, + opts, + ); + let bne_count = count(&out, |op| { + matches!( + op, + RiscVOp::Branch { + cond: Branch::Ne, + .. + } + ) + }); + assert_eq!(bne_count, 1, "only zero-divisor guard expected for div_u"); + } + + #[test] + fn rv32_signed_rem_also_gets_overflow_guard() { + let opts = SelectorOptions::wasm_compliant(); + let out = s_with_opts( + &[ + WasmOp::LocalGet(0), + WasmOp::LocalGet(1), + WasmOp::I32RemS, + WasmOp::End, + ], + 2, + opts, + ); + let bne_count = count(&out, |op| { + matches!( + op, + RiscVOp::Branch { + cond: Branch::Ne, + .. + } + ) + }); + assert!(bne_count >= 3); + assert!(count(&out, |op| matches!(op, RiscVOp::Rem { .. })) == 1); + } } diff --git a/crates/synth-backend/src/arm_backend.rs b/crates/synth-backend/src/arm_backend.rs index 9ad5070..e271225 100644 --- a/crates/synth-backend/src/arm_backend.rs +++ b/crates/synth-backend/src/arm_backend.rs @@ -6,7 +6,7 @@ use crate::ArmEncoder; use synth_core::backend::{ Backend, BackendCapabilities, BackendError, CodeRelocation, CompilationResult, CompileConfig, - CompiledFunction, + CompiledFunction, SafetyBounds, }; use synth_core::target::{IsaVariant, TargetSpec}; use synth_core::wasm_decoder::DecodedModule; @@ -144,10 +144,11 @@ fn compile_wasm_to_arm( ) -> Result<(Vec, Vec), String> { let num_params = count_params(wasm_ops); - let bounds_config = if config.bounds_check { - BoundsCheckConfig::Software - } else { - BoundsCheckConfig::None + let bounds_config = match config.effective_safety_bounds() { + SafetyBounds::None => BoundsCheckConfig::None, + SafetyBounds::Mpu => BoundsCheckConfig::Mpu, + SafetyBounds::Software => BoundsCheckConfig::Software, + SafetyBounds::Mask => BoundsCheckConfig::Masking, }; // Instruction selection: optimized or direct @@ -308,6 +309,68 @@ mod tests { assert!(func.relocations.is_empty()); } + // ─── Phase 1 safety-bounds plumbing for ARM ────────────────────────── + + #[test] + fn arm_safety_bounds_mpu_emits_same_code_as_none() { + // Mpu mode must not introduce any inline check on ARM — the MPU + // handles faults via hardware. The encoded bytes for an i32.load + // should be identical between None and Mpu. + let backend = ArmBackend::new(); + let ops = vec![ + WasmOp::LocalGet(0), + WasmOp::I32Load { + offset: 0, + align: 2, + }, + ]; + let cfg_none = CompileConfig { + no_optimize: true, + ..Default::default() + }; + let cfg_mpu = CompileConfig { + no_optimize: true, + safety_bounds: SafetyBounds::Mpu, + ..Default::default() + }; + let n = backend.compile_function("ld", &ops, &cfg_none).unwrap(); + let m = backend.compile_function("ld", &ops, &cfg_mpu).unwrap(); + assert_eq!( + n.code, m.code, + "Mpu and None should produce identical ARM bytes (Mpu relies on hardware)" + ); + } + + #[test] + fn arm_legacy_bounds_check_still_emits_software_check() { + // Legacy CLI users with `--bounds-check` should keep getting the + // software path even though the new SafetyBounds field defaults to None. + let backend = ArmBackend::new(); + let ops = vec![ + WasmOp::LocalGet(0), + WasmOp::I32Load { + offset: 0, + align: 2, + }, + ]; + let cfg_legacy = CompileConfig { + no_optimize: true, + bounds_check: true, + ..Default::default() + }; + let cfg_software = CompileConfig { + no_optimize: true, + safety_bounds: SafetyBounds::Software, + ..Default::default() + }; + let l = backend.compile_function("ld", &ops, &cfg_legacy).unwrap(); + let s = backend.compile_function("ld", &ops, &cfg_software).unwrap(); + assert_eq!( + l.code, s.code, + "--bounds-check should produce the same bytes as --safety-bounds=software" + ); + } + // ======================================================================== // ISA feature gate tests — ensure the compiler never emits unsupported // instructions for a given target diff --git a/crates/synth-cli/src/main.rs b/crates/synth-cli/src/main.rs index ac312f9..489d82a 100644 --- a/crates/synth-cli/src/main.rs +++ b/crates/synth-cli/src/main.rs @@ -14,7 +14,8 @@ use synth_backend::{ VectorTable, W2C2Backend, }; use synth_core::HardwareCapabilities; -use synth_core::backend::{Backend, BackendRegistry, CompileConfig}; +use synth_core::SafetyManifest; +use synth_core::backend::{Backend, BackendRegistry, CompileConfig, SafetyBounds}; use synth_core::target::TargetSpec; use synth_core::wasm_decoder::ImportEntry; use synth_synthesis::{FunctionOps, WasmMemory, WasmOp, decode_wasm_functions, decode_wasm_module}; @@ -150,11 +151,21 @@ enum Commands { #[arg(long)] loom: bool, - /// Enable software bounds checking for memory operations - /// Generates CMP/BHS before each load/store (~25% overhead) + /// DEPRECATED: alias for `--safety-bounds software`. Will be removed + /// in a future release. Prints a deprecation notice when used. #[arg(long)] bounds_check: bool, + /// Memory bounds safety profile (Phase 1 of binary-safety design). + /// + /// Accepted values: + /// - `none` — no inline check, no MPU/PMP setup (fastest, unsafe) + /// - `mpu` — rely on ARM MPU / RV32 PMP hardware enforcement + /// - `software` — emit CMP/BHS (ARM) or BGEU+EBREAK (RV32) per access + /// - `mask` — AND addr with `mem_size - 1` (requires power-of-two size) + #[arg(long, value_name = "MODE")] + safety_bounds: Option, + /// Compilation backend (arm, w2c2, awsm, wasker) #[arg(short, long, default_value = "arm")] backend: String, @@ -281,6 +292,7 @@ fn main() -> Result<()> { loom_compat, loom, bounds_check, + safety_bounds, backend, verify, link, @@ -295,6 +307,12 @@ fn main() -> Result<()> { // --loom implies --loom-compat (skip redundant synth passes) let loom_compat = loom_compat || loom; + // Phase 1 safety-bounds resolution. `--safety-bounds` takes + // precedence; `--bounds-check` is the legacy alias and emits a + // single-line deprecation notice when used. + let resolved_safety_bounds = + resolve_safety_bounds(safety_bounds.as_deref(), bounds_check)?; + compile_command( input, output.clone(), @@ -306,7 +324,7 @@ fn main() -> Result<()> { no_optimize, loom_compat, loom, - bounds_check, + resolved_safety_bounds, &backend, verify, &target_spec, @@ -722,6 +740,63 @@ fn maybe_run_loom(enabled: bool, wasm_bytes: Vec) -> Result> { ); } +/// Reconcile `--safety-bounds` and the legacy `--bounds-check` flag. Prints a +/// one-line deprecation notice when the legacy flag is used. Phase 1 of +/// `docs/binary-safety-design.md` §2 (CLI surface). +fn resolve_safety_bounds( + safety_bounds: Option<&str>, + legacy_bounds_check: bool, +) -> Result { + if let Some(v) = safety_bounds { + let parsed = SafetyBounds::parse(v).map_err(|e| anyhow::anyhow!(e))?; + if legacy_bounds_check { + eprintln!( + "warning: --bounds-check is deprecated; --safety-bounds={} takes precedence", + parsed.as_str() + ); + } + return Ok(parsed); + } + if legacy_bounds_check { + eprintln!("warning: --bounds-check is deprecated; use --safety-bounds=software instead"); + return Ok(SafetyBounds::Software); + } + Ok(SafetyBounds::None) +} + +/// Emit the `safety-manifest.json` sidecar when any safety knob is active. +/// Phase 1 only records bounds + division traps; later phases will extend +/// the schema. Silently no-ops when `safety_bounds == None` (the default, +/// for back-compat with callers that don't opt in). +fn maybe_emit_safety_manifest( + elf_path: &std::path::Path, + target_spec: &TargetSpec, + safety_bounds: SafetyBounds, + linear_memory_bytes: u32, +) -> Result<()> { + if safety_bounds == SafetyBounds::None { + return Ok(()); + } + let manifest = SafetyManifest { + synth_version: env!("CARGO_PKG_VERSION").to_string(), + target_triple: target_spec.triple.clone(), + safety_bounds, + // Phase 1: div-by-zero and signed-div-overflow are always enabled + // for WASM-compliant output on both backends. The columns will gain + // independent knobs in Phase 2 when `--safety-div` / `--safety-div-overflow` + // CLI flags land. + safety_div_zero: true, + safety_div_overflow: true, + linear_memory_bytes, + }; + let sidecar = SafetyManifest::sidecar_path(elf_path); + let json = manifest.to_json(); + std::fs::write(&sidecar, json) + .with_context(|| format!("Failed to write safety manifest: {}", sidecar.display()))?; + info!("Wrote safety manifest: {}", sidecar.display()); + Ok(()) +} + #[allow(clippy::too_many_arguments)] fn compile_command( input: Option, @@ -734,7 +809,7 @@ fn compile_command( no_optimize: bool, loom_compat: bool, loom: bool, - bounds_check: bool, + safety_bounds: SafetyBounds, backend_name: &str, verify: bool, target_spec: &TargetSpec, @@ -777,7 +852,7 @@ fn compile_command( no_optimize, loom_compat, loom, - bounds_check, + safety_bounds, backend, verify, target_spec, @@ -882,7 +957,7 @@ fn compile_command( let config = CompileConfig { no_optimize, loom_compat, - bounds_check, + safety_bounds, target: target_spec.clone(), ..CompileConfig::default() }; @@ -912,6 +987,12 @@ fn compile_command( file.write_all(&elf_data) .context("Failed to write ELF data")?; + // Phase 1: write the safety-manifest sidecar whenever any safety knob + // is active. Single-function path uses 0 for linear-memory-bytes because + // the WASM was supplied as a raw function-body slice — `compile_all_exports` + // has the module context and threads through the real value. + maybe_emit_safety_manifest(&output, target_spec, safety_bounds, 0)?; + println!("Compiled {} to {}", func_name, output.display()); println!(" Code size: {} bytes", code.len()); println!(" ELF size: {} bytes", elf_data.len()); @@ -1407,7 +1488,7 @@ fn compile_all_exports( no_optimize: bool, loom_compat: bool, loom: bool, - bounds_check: bool, + safety_bounds: SafetyBounds, backend: &dyn Backend, verify: bool, target_spec: &TargetSpec, @@ -1561,7 +1642,7 @@ fn compile_all_exports( let config = CompileConfig { no_optimize, loom_compat, - bounds_check, + safety_bounds, num_imports: max_num_imported_funcs, target: target_spec.clone(), ..CompileConfig::default() @@ -1651,6 +1732,11 @@ fn compile_all_exports( file.write_all(&elf_data) .context("Failed to write ELF data")?; + // Phase 1: emit safety-manifest.json next to the ELF when any + // safety knob is active. + let linear_mem_bytes = all_memories.first().map(|m| m.initial_bytes()).unwrap_or(0); + maybe_emit_safety_manifest(&output, target_spec, safety_bounds, linear_mem_bytes)?; + let total_code: usize = compiled_funcs.iter().map(|f| f.code.len()).sum(); let total_relocs: usize = compiled_funcs.iter().map(|f| f.relocations.len()).sum(); println!( diff --git a/crates/synth-core/src/backend.rs b/crates/synth-core/src/backend.rs index 32e6939..2088fce 100644 --- a/crates/synth-core/src/backend.rs +++ b/crates/synth-core/src/backend.rs @@ -25,6 +25,53 @@ pub enum BackendError { ExternalToolError(String), } +/// Memory-bounds safety strategy. Phase 1 of `docs/binary-safety-design.md` §3.1. +/// +/// - `Mpu`/PMP: rely on hardware (ARM MPU or RV32 PMP) — no inline check. +/// - `Software`: emit a `CMP/BHS Trap_Handler` (ARM) or `bgeu addr, mem_size, ebreak` (RV32) +/// before every load/store. +/// - `Mask`: emit `AND addr, addr, #(mem_size - 1)` — only valid when memory size +/// is a power of two. Wraps on OOB rather than trapping (fuzz-profile semantics). +/// - `None`: no bounds enforcement. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SafetyBounds { + /// No bounds check (caller assumes the WASM module is trusted) + #[default] + None, + /// ARM MPU / RV32 PMP — hardware enforcement, no inline guard + Mpu, + /// Software CMP/BHS (ARM) or BGEU+EBREAK (RV32) per access + Software, + /// AND-mask, requires power-of-two memory size + Mask, +} + +impl SafetyBounds { + /// Parse the `--safety-bounds` argument value. + pub fn parse(s: &str) -> std::result::Result { + match s { + "none" => Ok(SafetyBounds::None), + "mpu" | "pmp" => Ok(SafetyBounds::Mpu), + "software" | "soft" => Ok(SafetyBounds::Software), + "mask" | "masking" => Ok(SafetyBounds::Mask), + other => Err(format!( + "unknown --safety-bounds value '{}'; expected one of: none, mpu, software, mask", + other + )), + } + } + + /// String form used in the safety manifest. + pub fn as_str(self) -> &'static str { + match self { + SafetyBounds::None => "none", + SafetyBounds::Mpu => "mpu", + SafetyBounds::Software => "software", + SafetyBounds::Mask => "mask", + } + } +} + /// Configuration for a compilation run #[derive(Debug, Clone)] pub struct CompileConfig { @@ -32,8 +79,15 @@ pub struct CompileConfig { pub opt_level: u8, /// Target specification pub target: TargetSpec, - /// Enable software bounds checking for memory operations + /// Legacy: enable software bounds checking for memory operations. + /// Deprecated in favor of `safety_bounds`. When set, equivalent to + /// `SafetyBounds::Software`. Kept for backwards compatibility with + /// callers that haven't migrated yet. pub bounds_check: bool, + /// Phase-1 unified safety-bounds knob. If `bounds_check` is `true` and + /// this is `None`, the legacy field wins (back-compat). If both are set, + /// `safety_bounds` wins. + pub safety_bounds: SafetyBounds, /// Hardware profile name (e.g. "nrf52840", "stm32f407") pub hardware: String, /// Skip optimization passes (direct instruction selection) @@ -44,12 +98,25 @@ pub struct CompileConfig { pub num_imports: u32, } +impl CompileConfig { + /// Resolve the effective safety-bounds setting, honouring the legacy + /// `bounds_check` field as a fallback. Used by backends to pick the + /// inline-check shape. + pub fn effective_safety_bounds(&self) -> SafetyBounds { + match (self.safety_bounds, self.bounds_check) { + (SafetyBounds::None, true) => SafetyBounds::Software, + (s, _) => s, + } + } +} + impl Default for CompileConfig { fn default() -> Self { Self { opt_level: 2, target: TargetSpec::cortex_m4(), bounds_check: false, + safety_bounds: SafetyBounds::None, hardware: String::new(), no_optimize: false, loom_compat: false, @@ -199,6 +266,37 @@ mod tests { let config = CompileConfig::default(); assert_eq!(config.opt_level, 2); assert!(!config.bounds_check); + assert_eq!(config.safety_bounds, SafetyBounds::None); assert!(!config.no_optimize); } + + #[test] + fn safety_bounds_parse_round_trip() { + for s in ["none", "mpu", "software", "mask"] { + let sb = SafetyBounds::parse(s).unwrap(); + assert_eq!(sb.as_str(), s); + } + assert_eq!(SafetyBounds::parse("pmp").unwrap(), SafetyBounds::Mpu); + assert_eq!(SafetyBounds::parse("soft").unwrap(), SafetyBounds::Software); + assert!(SafetyBounds::parse("nonsense").is_err()); + } + + #[test] + fn effective_safety_bounds_legacy_promotes_to_software() { + let cfg = CompileConfig { + bounds_check: true, + ..Default::default() + }; + assert_eq!(cfg.effective_safety_bounds(), SafetyBounds::Software); + } + + #[test] + fn effective_safety_bounds_new_field_wins() { + let cfg = CompileConfig { + bounds_check: true, + safety_bounds: SafetyBounds::Mpu, + ..Default::default() + }; + assert_eq!(cfg.effective_safety_bounds(), SafetyBounds::Mpu); + } } diff --git a/crates/synth-core/src/lib.rs b/crates/synth-core/src/lib.rs index c4fafdf..afbbcce 100644 --- a/crates/synth-core/src/lib.rs +++ b/crates/synth-core/src/lib.rs @@ -8,6 +8,7 @@ pub mod backend; pub mod component; pub mod error; pub mod ir; +pub mod safety_manifest; pub mod target; pub mod wasm_decoder; pub mod wasm_op; @@ -16,6 +17,7 @@ pub use backend::*; pub use component::*; pub use error::{Error, Result}; pub use ir::*; +pub use safety_manifest::SafetyManifest; pub use target::*; pub use wasm_decoder::{ DecodedModule, FunctionOps, ImportEntry, ImportKind, WasmMemory, decode_wasm_functions, diff --git a/crates/synth-core/src/safety_manifest.rs b/crates/synth-core/src/safety_manifest.rs new file mode 100644 index 0000000..6968d48 --- /dev/null +++ b/crates/synth-core/src/safety_manifest.rs @@ -0,0 +1,173 @@ +//! Safety manifest — sidecar JSON describing the runtime safety checks +//! baked into a compiled ELF. Phase 1 of `docs/binary-safety-design.md` §2. +//! +//! The manifest is intentionally small in Phase 1 (memory bounds + div traps). +//! Later phases extend it with stack-overflow, CFI, prologue, poison, and +//! component-model knobs. The schema is forward-compatible: consumers should +//! treat unknown keys as informational and unknown values as opaque strings. +//! +//! Path convention: when the compiler emits `foo.elf`, it writes +//! `foo.safety-manifest.json` next to it. + +use crate::backend::SafetyBounds; +use std::path::{Path, PathBuf}; + +/// Compile-time safety profile that ends up in the manifest sidecar. +/// +/// Each field maps to one row of §3 in the binary-safety design doc; in +/// Phase 1 only `safety_bounds`, `safety_div_zero`, and +/// `safety_div_overflow` carry meaningful values. Other knobs are stubbed +/// as `false`/`"none"` so consumers see the schema shape and can detect +/// when a future synth release turns them on. +#[derive(Debug, Clone)] +pub struct SafetyManifest { + /// `CARGO_PKG_VERSION` of the synth release that produced the ELF. + pub synth_version: String, + /// LLVM-style target triple (e.g. `thumbv7em-none-eabi`, + /// `riscv32imac-unknown-none-elf`). + pub target_triple: String, + /// Memory-bounds enforcement strategy. + pub safety_bounds: SafetyBounds, + /// WASM-required divide-by-zero trap is emitted at every i*.div_* / + /// i*.rem_* site. Phase 1: always `true` on both backends. + pub safety_div_zero: bool, + /// WASM-required signed-division overflow (INT_MIN / -1) trap. Phase 1: + /// `true` on ARM (already shipped) and `true` on RV32 (added this phase). + pub safety_div_overflow: bool, + /// Linear-memory size in bytes that the bounds/mask check was sized for. + /// Recorded so an auditor can spot a manifest/ELF mismatch. + pub linear_memory_bytes: u32, +} + +impl SafetyManifest { + /// Serialise to pretty-printed JSON. Deliberately handwritten to avoid + /// pulling `serde_derive` into `synth-core` (which is upstream of every + /// backend and the proof-extraction toolchain). + pub fn to_json(&self) -> String { + let mut out = String::new(); + out.push_str("{\n"); + out.push_str(&format!( + " \"synth_version\": {},\n", + json_string(&self.synth_version) + )); + out.push_str(&format!( + " \"target_triple\": {},\n", + json_string(&self.target_triple) + )); + out.push_str(&format!( + " \"safety_bounds\": {},\n", + json_string(self.safety_bounds.as_str()) + )); + out.push_str(&format!( + " \"safety_div_zero\": {},\n", + self.safety_div_zero + )); + out.push_str(&format!( + " \"safety_div_overflow\": {},\n", + self.safety_div_overflow + )); + out.push_str(&format!( + " \"linear_memory_bytes\": {}\n", + self.linear_memory_bytes + )); + out.push_str("}\n"); + out + } + + /// Compute the manifest path next to an output ELF: replaces the file + /// extension with `.safety-manifest.json` (or appends it if no extension). + /// Examples: + /// - `foo.elf` → `foo.safety-manifest.json` + /// - `out` → `out.safety-manifest.json` + pub fn sidecar_path(elf_path: &Path) -> PathBuf { + let mut p = elf_path.to_path_buf(); + // `set_extension` clobbers the dot before the extension, so emit + // a compound suffix explicitly. + let stem = elf_path + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| "out".to_string()); + p.set_file_name(format!("{}.safety-manifest.json", stem)); + p + } +} + +/// Minimal JSON-string escaper sufficient for our short, ASCII-only fields. +fn json_string(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 2); + out.push('"'); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + c if (c as u32) < 0x20 => { + out.push_str(&format!("\\u{:04x}", c as u32)); + } + c => out.push(c), + } + } + out.push('"'); + out +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn sample() -> SafetyManifest { + SafetyManifest { + synth_version: "0.3.1".to_string(), + target_triple: "thumbv7em-none-eabi".to_string(), + safety_bounds: SafetyBounds::Mpu, + safety_div_zero: true, + safety_div_overflow: true, + linear_memory_bytes: 65536, + } + } + + #[test] + fn sidecar_path_strips_elf_extension() { + let p = SafetyManifest::sidecar_path(&PathBuf::from("/tmp/foo.elf")); + assert_eq!(p, PathBuf::from("/tmp/foo.safety-manifest.json")); + } + + #[test] + fn sidecar_path_handles_missing_extension() { + let p = SafetyManifest::sidecar_path(&PathBuf::from("out")); + assert_eq!(p, PathBuf::from("out.safety-manifest.json")); + } + + #[test] + fn json_round_trip_shape() { + let json = sample().to_json(); + assert!(json.contains("\"synth_version\": \"0.3.1\"")); + assert!(json.contains("\"target_triple\": \"thumbv7em-none-eabi\"")); + assert!(json.contains("\"safety_bounds\": \"mpu\"")); + assert!(json.contains("\"safety_div_zero\": true")); + assert!(json.contains("\"safety_div_overflow\": true")); + assert!(json.contains("\"linear_memory_bytes\": 65536")); + } + + #[test] + fn json_escapes_quotes_in_triple() { + let m = SafetyManifest { + target_triple: "weird-\"quoted\"-triple".to_string(), + ..sample() + }; + let json = m.to_json(); + assert!(json.contains("weird-\\\"quoted\\\"-triple")); + } + + #[test] + fn json_none_bounds_serialises() { + let m = SafetyManifest { + safety_bounds: SafetyBounds::None, + ..sample() + }; + assert!(m.to_json().contains("\"safety_bounds\": \"none\"")); + } +} diff --git a/crates/synth-synthesis/src/instruction_selector.rs b/crates/synth-synthesis/src/instruction_selector.rs index 6e5c41d..8a5e5f6 100644 --- a/crates/synth-synthesis/src/instruction_selector.rs +++ b/crates/synth-synthesis/src/instruction_selector.rs @@ -16,8 +16,13 @@ use synth_core::target::FPUPrecision; /// Bounds checking configuration for memory operations #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BoundsCheckConfig { - /// No bounds checking (relies on MPU or other hardware protection) + /// No bounds checking (no inline guard). None, + /// Hardware MPU enforcement (ARM) / PMP (RV32). The backend does not emit + /// any inline check — the MPU/PMP raises a fault on out-of-bounds accesses. + /// Equivalent to `None` from the selector's point of view but distinguished + /// here so the safety-manifest can record the intent. + Mpu, /// Software bounds checking with CMP/BHS before each access /// R10 holds the memory size, initialized by startup code Software, @@ -26,6 +31,14 @@ pub enum BoundsCheckConfig { Masking, } +impl BoundsCheckConfig { + /// `true` when no inline guard instructions are emitted (either disabled + /// entirely or hardware-enforced via MPU/PMP). + pub fn is_passthrough(self) -> bool { + matches!(self, BoundsCheckConfig::None | BoundsCheckConfig::Mpu) + } +} + /// ARM instruction with operands #[derive(Debug, Clone, PartialEq)] pub struct ArmInstruction { @@ -2962,7 +2975,7 @@ impl InstructionSelector { }; match self.bounds_check { - BoundsCheckConfig::None => vec![load_op], + BoundsCheckConfig::None | BoundsCheckConfig::Mpu => vec![load_op], BoundsCheckConfig::Software => { // Software bounds check: verify last byte of access is in bounds // ADD temp, addr_reg, #(offset + access_size - 1) @@ -3026,7 +3039,7 @@ impl InstructionSelector { }; match self.bounds_check { - BoundsCheckConfig::None => vec![store_op], + BoundsCheckConfig::None | BoundsCheckConfig::Mpu => vec![store_op], BoundsCheckConfig::Software => { // Software bounds check: verify last byte of access is in bounds let temp = Reg::R12; @@ -3082,7 +3095,7 @@ impl InstructionSelector { }; match self.bounds_check { - BoundsCheckConfig::None => vec![load_op], + BoundsCheckConfig::None | BoundsCheckConfig::Mpu => vec![load_op], BoundsCheckConfig::Software => { // Software bounds check: verify last byte of 8-byte access is in bounds // ADD temp, addr_reg, #(offset + 8 - 1) @@ -3141,7 +3154,7 @@ impl InstructionSelector { }; match self.bounds_check { - BoundsCheckConfig::None => vec![store_op], + BoundsCheckConfig::None | BoundsCheckConfig::Mpu => vec![store_op], BoundsCheckConfig::Software => { // Software bounds check: verify last byte of 8-byte access is in bounds let temp = Reg::R12; @@ -3195,7 +3208,7 @@ impl InstructionSelector { }; match self.bounds_check { - BoundsCheckConfig::None => vec![load_op], + BoundsCheckConfig::None | BoundsCheckConfig::Mpu => vec![load_op], BoundsCheckConfig::Software => { let temp = Reg::R12; let end_offset = offset + (access_size as i32) - 1; @@ -3248,7 +3261,7 @@ impl InstructionSelector { }; match self.bounds_check { - BoundsCheckConfig::None => vec![store_op], + BoundsCheckConfig::None | BoundsCheckConfig::Mpu => vec![store_op], BoundsCheckConfig::Software => { let temp = Reg::R12; let end_offset = offset + (access_size as i32) - 1; @@ -3309,7 +3322,7 @@ impl InstructionSelector { }; match self.bounds_check { - BoundsCheckConfig::None => vec![load_op], + BoundsCheckConfig::None | BoundsCheckConfig::Mpu => vec![load_op], BoundsCheckConfig::Software => { let temp = Reg::R12; let end_offset = offset + (access_size as i32) - 1; @@ -3375,7 +3388,7 @@ impl InstructionSelector { }; match self.bounds_check { - BoundsCheckConfig::None => vec![store_op], + BoundsCheckConfig::None | BoundsCheckConfig::Mpu => vec![store_op], BoundsCheckConfig::Software => { let temp = Reg::R12; let end_offset = offset + (access_size as i32) - 1; @@ -3441,7 +3454,7 @@ impl InstructionSelector { /// Issue #95: replaces 10-byte `MOVW+MOVT+LDR.W` with a 4-byte `LDR.W` /// for static-address loads/stores. fn try_fold_const_addr(&self, wasm_ops: &[WasmOp], idx: usize, offset: u32) -> Option { - if !matches!(self.bounds_check, BoundsCheckConfig::None) { + if !self.bounds_check.is_passthrough() { return None; } if idx == 0 { @@ -3486,7 +3499,7 @@ impl InstructionSelector { idx: usize, offset: u32, ) -> Option { - if !matches!(self.bounds_check, BoundsCheckConfig::None) { + if !self.bounds_check.is_passthrough() { return None; } if idx < 2 {