From f76c679a0e69739c7827241767e0059409c4fba6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Walter?= Date: Tue, 21 Apr 2026 15:28:11 +0200 Subject: [PATCH] Expose V4 audit committed fields to Python via deserialize_v4_audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify_v4_binary cryptographically binds the audit binary to a specific output_text (via the detokenization check) but does not surface that text to the caller. Relay-layer callers — servers that receive an audit binary alongside a client-declared output — currently have to either (a) re-implement the binary wire format to read the committed fields, or (b) trust a parallel "claimed output" value the client supplies on the wire. Option (b) is what opens a re-binding gap: a valid binary can be re-attached to arbitrary new outputs because the server never consults the binary's own attested output. This change adds verilm_rs.deserialize_v4_audit(audit_binary) which returns the publicly-committed fields as a Python dict: output_text, prompt, n_prompt_tokens, prefix_token_ids, token_index, token_id, plus a nested commitment dict with n_tokens, version, and the spec / manifest / prompt / seed hashes. Internal verification state (Merkle proofs, retained state, shell openings, KV transcripts, prefix embeddings, witnessed scores) is intentionally omitted. With this, a relay server can drop the client-supplied "claimed output" from its wire protocol entirely and treat the binary as the single source of truth: verify_v4_binary(bin, key) first, then deserialize_v4_audit(bin)["output_text"] as the canonical content. --- CHANGELOG.md | 6 + crates/verilm-py/src/lib.rs | 119 ++++++++++++++++++ .../tests/test_deserialize_v4_audit.py | 35 ++++++ 3 files changed, 160 insertions(+) create mode 100644 crates/verilm-py/tests/test_deserialize_v4_audit.py diff --git a/CHANGELOG.md b/CHANGELOG.md index db5a780..a6c0503 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ This changelog tracks the kept canonical VeriLM protocol and its major implement Historical references below to “roadmap #N” refer to the pre-2026-03-30 roadmap numbering. On 2026-03-30 the roadmap was renumbered into a single linear open-items-only sequence. +## 2026-04-21 + +### Added + +- **`verilm_rs.deserialize_v4_audit(audit_binary) -> dict`** in `verilm-py`. Exposes the publicly-committed fields of a V4 audit binary (`output_text`, `prompt`, `prompt_hash`, `input/model/decode/output_spec_hash`, `manifest_hash`, `n_tokens`, etc.) as a Python dict. Internal verification state (Merkle proofs, retained state, shell openings, KV entries/roots/proofs, prefix embeddings, witnessed scores) is intentionally not surfaced. Enables relay-layer callers that already trust the binary (having passed `verify_v4_binary`) to extract the committed output text directly rather than accepting a parallel "claimed output" value from the client. + ## 2026-04-20 ### Measured diff --git a/crates/verilm-py/src/lib.rs b/crates/verilm-py/src/lib.rs index 7ac034e..a14b635 100644 --- a/crates/verilm-py/src/lib.rs +++ b/crates/verilm-py/src/lib.rs @@ -1725,6 +1725,124 @@ fn run_verify_with_artifact( verilm_verify::verify_v4_full_with_artifact(key, response, decode_artifact, tok_ref, detok_ref) } +/// Deserialize a V4 audit binary and expose its publicly committed +/// fields as a Python dict. +/// +/// The `V4AuditResponse` struct is cryptographically bound by +/// `verify_v4_binary`: callers who want a trustworthy view of the +/// committed values should run `verify_v4_binary(audit_binary, key)` +/// first, then read this dict. Fields that are verification internals +/// (Merkle proofs, retained state, shell openings, KV +/// entries/roots/proofs, prefix embeddings, witnessed scores) are +/// intentionally **not** surfaced — they are consumed by the verifier +/// and have no meaning to downstream callers. +/// +/// Returned dict shape (fields may be `None` / missing when not +/// populated by the prover): +/// +/// ```text +/// { +/// "token_index": int, +/// "token_id": int, +/// "prev_io_hash": bytes, # 32 bytes +/// "revealed_seed": bytes, # 32 bytes +/// "output_text": str | None, +/// "prompt": bytes | None, +/// "n_prompt_tokens": int | None, +/// "prefix_token_ids": list[int], +/// "commitment": { +/// "version": str, # e.g. "v4" +/// "n_tokens": int, +/// "merkle_root": bytes, # 32 bytes +/// "io_root": bytes, # 32 bytes +/// "prompt_hash": bytes | None, +/// "seed_commitment": bytes | None, +/// "manifest_hash": bytes | None, +/// "input_spec_hash": bytes | None, +/// "model_spec_hash": bytes | None, +/// "decode_spec_hash": bytes | None, +/// "output_spec_hash": bytes | None, +/// "n_prompt_tokens": int | None, +/// } +/// } +/// ``` +/// +/// Args: +/// audit_binary: bytes — V4AuditResponse binary payload. +/// +/// Returns: +/// dict of publicly-committed fields. +/// +/// Raises: +/// ValueError — if `audit_binary` cannot be deserialized. +#[pyfunction] +fn deserialize_v4_audit<'py>( + py: Python<'py>, + audit_binary: &[u8], +) -> PyResult> { + let r = verilm_core::serialize::deserialize_v4_audit(audit_binary) + .map_err(|e| PyValueError::new_err(format!("failed to deserialize V4 binary: {}", e)))?; + + fn opt_bytes_32<'py>( + py: Python<'py>, + v: Option<[u8; 32]>, + ) -> Option> { + v.map(|h| PyBytes::new(py, &h)) + } + + let commitment = PyDict::new(py); + commitment.set_item( + "version", + match r.commitment.version { + verilm_core::types::CommitmentVersion::V4 => "v4", + }, + )?; + commitment.set_item("n_tokens", r.commitment.n_tokens)?; + commitment.set_item("merkle_root", PyBytes::new(py, &r.commitment.merkle_root))?; + commitment.set_item("io_root", PyBytes::new(py, &r.commitment.io_root))?; + commitment.set_item("prompt_hash", opt_bytes_32(py, r.commitment.prompt_hash))?; + commitment.set_item( + "seed_commitment", + opt_bytes_32(py, r.commitment.seed_commitment), + )?; + commitment.set_item( + "manifest_hash", + opt_bytes_32(py, r.commitment.manifest_hash), + )?; + commitment.set_item( + "input_spec_hash", + opt_bytes_32(py, r.commitment.input_spec_hash), + )?; + commitment.set_item( + "model_spec_hash", + opt_bytes_32(py, r.commitment.model_spec_hash), + )?; + commitment.set_item( + "decode_spec_hash", + opt_bytes_32(py, r.commitment.decode_spec_hash), + )?; + commitment.set_item( + "output_spec_hash", + opt_bytes_32(py, r.commitment.output_spec_hash), + )?; + commitment.set_item("n_prompt_tokens", r.commitment.n_prompt_tokens)?; + + let dict = PyDict::new(py); + dict.set_item("token_index", r.token_index)?; + dict.set_item("token_id", r.token_id)?; + dict.set_item("prev_io_hash", PyBytes::new(py, &r.prev_io_hash))?; + dict.set_item("revealed_seed", PyBytes::new(py, &r.revealed_seed))?; + dict.set_item("output_text", r.output_text.as_deref())?; + dict.set_item( + "prompt", + r.prompt.as_deref().map(|b| PyBytes::new(py, b)), + )?; + dict.set_item("n_prompt_tokens", r.n_prompt_tokens)?; + dict.set_item("prefix_token_ids", r.prefix_token_ids.clone())?; + dict.set_item("commitment", commitment)?; + Ok(dict) +} + /// Verify that externally-computed prompt token IDs match the committed token chain. /// /// The caller tokenizes the raw prompt using the committed InputSpec and passes @@ -2617,6 +2735,7 @@ fn verilm_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(diagnose_cast_order, m)?)?; m.add_class::()?; m.add_function(wrap_pyfunction!(verify_input_tokenization, m)?)?; + m.add_function(wrap_pyfunction!(deserialize_v4_audit, m)?)?; m.add_function(wrap_pyfunction!(deterministic_attention_bf16, m)?)?; Ok(()) } diff --git a/crates/verilm-py/tests/test_deserialize_v4_audit.py b/crates/verilm-py/tests/test_deserialize_v4_audit.py new file mode 100644 index 0000000..b10fcd5 --- /dev/null +++ b/crates/verilm-py/tests/test_deserialize_v4_audit.py @@ -0,0 +1,35 @@ +"""Tests for verilm_rs.deserialize_v4_audit — exposes the publicly-committed +fields of a V4 audit binary as a Python dict so downstream callers can +inspect `output_text`, commitment hashes, and prompt bytes without +re-implementing the binary wire format. + +Full-path tests that exercise a real V4 audit binary end-to-end live in +the Rust-side prover/verifier integration tests. These tests focus on +the Python surface: symbol presence, error shape on malformed input. +""" + +import pytest +import verilm_rs + + +class TestDeserializeV4Audit: + def test_symbol_is_exported(self): + """The function must be registered on the module.""" + assert hasattr(verilm_rs, "deserialize_v4_audit") + assert callable(verilm_rs.deserialize_v4_audit) + + def test_rejects_non_audit_bytes(self): + """Garbage bytes should raise a ValueError, not crash or + return a half-constructed dict.""" + with pytest.raises(ValueError, match="deserialize"): + verilm_rs.deserialize_v4_audit(b"not-a-real-audit-binary") + + def test_rejects_empty_bytes(self): + with pytest.raises(ValueError, match="deserialize"): + verilm_rs.deserialize_v4_audit(b"") + + def test_rejects_truncated_magic(self): + """A few bytes that don't match the binary header must fail + cleanly.""" + with pytest.raises(ValueError, match="deserialize"): + verilm_rs.deserialize_v4_audit(b"\x00\x01\x02\x03")