From f76c679a0e69739c7827241767e0059409c4fba6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Walter?= <clement0walter@gmail.com>
Date: Tue, 21 Apr 2026 15:28:11 +0200
Subject: [PATCH] Expose V4 audit committed fields to Python via
 deserialize_v4_audit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Verify_v4_binary cryptographically binds the audit binary to a specific
output_text (via the detokenization check) but does not surface that
text to the caller. Relay-layer callers — servers that receive an
audit binary alongside a client-declared output — currently have to
either (a) re-implement the binary wire format to read the committed
fields, or (b) trust a parallel "claimed output" value the client
supplies on the wire. Option (b) is what opens a re-binding gap: a
valid binary can be re-attached to arbitrary new outputs because the
server never consults the binary's own attested output.

This change adds verilm_rs.deserialize_v4_audit(audit_binary) which
returns the publicly-committed fields as a Python dict: output_text,
prompt, n_prompt_tokens, prefix_token_ids, token_index, token_id,
plus a nested commitment dict with n_tokens, version, and the spec /
manifest / prompt / seed hashes. Internal verification state (Merkle
proofs, retained state, shell openings, KV transcripts, prefix
embeddings, witnessed scores) is intentionally omitted.

With this, a relay server can drop the client-supplied "claimed output"
from its wire protocol entirely and treat the binary as the single
source of truth: verify_v4_binary(bin, key) first, then
deserialize_v4_audit(bin)["output_text"] as the canonical content.
---
 CHANGELOG.md                                  |   6 +
 crates/verilm-py/src/lib.rs                   | 119 ++++++++++++++++++
 .../tests/test_deserialize_v4_audit.py        |  35 ++++++
 3 files changed, 160 insertions(+)
 create mode 100644 crates/verilm-py/tests/test_deserialize_v4_audit.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index db5a780..a6c0503 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,12 @@ This changelog tracks the kept canonical VeriLM protocol and its major implement
 
 Historical references below to “roadmap #N” refer to the pre-2026-03-30 roadmap numbering. On 2026-03-30 the roadmap was renumbered into a single linear open-items-only sequence.
 
+## 2026-04-21
+
+### Added
+
+- **`verilm_rs.deserialize_v4_audit(audit_binary) -> dict`** in `verilm-py`. Exposes the publicly-committed fields of a V4 audit binary (`output_text`, `prompt`, `prompt_hash`, `input/model/decode/output_spec_hash`, `manifest_hash`, `n_tokens`, etc.) as a Python dict. Internal verification state (Merkle proofs, retained state, shell openings, KV entries/roots/proofs, prefix embeddings, witnessed scores) is intentionally not surfaced. Enables relay-layer callers that already trust the binary (having passed `verify_v4_binary`) to extract the committed output text directly rather than accepting a parallel "claimed output" value from the client.
+
 ## 2026-04-20
 
 ### Measured
diff --git a/crates/verilm-py/src/lib.rs b/crates/verilm-py/src/lib.rs
index 7ac034e..a14b635 100644
--- a/crates/verilm-py/src/lib.rs
+++ b/crates/verilm-py/src/lib.rs
@@ -1725,6 +1725,124 @@ fn run_verify_with_artifact(
     verilm_verify::verify_v4_full_with_artifact(key, response, decode_artifact, tok_ref, detok_ref)
 }
 
+/// Deserialize a V4 audit binary and expose its publicly committed
+/// fields as a Python dict.
+///
+/// The `V4AuditResponse` struct is cryptographically bound by
+/// `verify_v4_binary`: callers who want a trustworthy view of the
+/// committed values should run `verify_v4_binary(audit_binary, key)`
+/// first, then read this dict. Fields that are verification internals
+/// (Merkle proofs, retained state, shell openings, KV
+/// entries/roots/proofs, prefix embeddings, witnessed scores) are
+/// intentionally **not** surfaced — they are consumed by the verifier
+/// and have no meaning to downstream callers.
+///
+/// Returned dict shape (fields may be `None` / missing when not
+/// populated by the prover):
+///
+/// ```text
+/// {
+///   "token_index": int,
+///   "token_id": int,
+///   "prev_io_hash": bytes,               # 32 bytes
+///   "revealed_seed": bytes,              # 32 bytes
+///   "output_text": str | None,
+///   "prompt": bytes | None,
+///   "n_prompt_tokens": int | None,
+///   "prefix_token_ids": list[int],
+///   "commitment": {
+///       "version": str,                  # e.g. "v4"
+///       "n_tokens": int,
+///       "merkle_root": bytes,            # 32 bytes
+///       "io_root": bytes,                # 32 bytes
+///       "prompt_hash": bytes | None,
+///       "seed_commitment": bytes | None,
+///       "manifest_hash": bytes | None,
+///       "input_spec_hash": bytes | None,
+///       "model_spec_hash": bytes | None,
+///       "decode_spec_hash": bytes | None,
+///       "output_spec_hash": bytes | None,
+///       "n_prompt_tokens": int | None,
+///   }
+/// }
+/// ```
+///
+/// Args:
+///     audit_binary: bytes — V4AuditResponse binary payload.
+///
+/// Returns:
+///     dict of publicly-committed fields.
+///
+/// Raises:
+///     ValueError — if `audit_binary` cannot be deserialized.
+#[pyfunction]
+fn deserialize_v4_audit<'py>(
+    py: Python<'py>,
+    audit_binary: &[u8],
+) -> PyResult<Bound<'py, PyDict>> {
+    let r = verilm_core::serialize::deserialize_v4_audit(audit_binary)
+        .map_err(|e| PyValueError::new_err(format!("failed to deserialize V4 binary: {}", e)))?;
+
+    fn opt_bytes_32<'py>(
+        py: Python<'py>,
+        v: Option<[u8; 32]>,
+    ) -> Option<Bound<'py, PyBytes>> {
+        v.map(|h| PyBytes::new(py, &h))
+    }
+
+    let commitment = PyDict::new(py);
+    commitment.set_item(
+        "version",
+        match r.commitment.version {
+            verilm_core::types::CommitmentVersion::V4 => "v4",
+        },
+    )?;
+    commitment.set_item("n_tokens", r.commitment.n_tokens)?;
+    commitment.set_item("merkle_root", PyBytes::new(py, &r.commitment.merkle_root))?;
+    commitment.set_item("io_root", PyBytes::new(py, &r.commitment.io_root))?;
+    commitment.set_item("prompt_hash", opt_bytes_32(py, r.commitment.prompt_hash))?;
+    commitment.set_item(
+        "seed_commitment",
+        opt_bytes_32(py, r.commitment.seed_commitment),
+    )?;
+    commitment.set_item(
+        "manifest_hash",
+        opt_bytes_32(py, r.commitment.manifest_hash),
+    )?;
+    commitment.set_item(
+        "input_spec_hash",
+        opt_bytes_32(py, r.commitment.input_spec_hash),
+    )?;
+    commitment.set_item(
+        "model_spec_hash",
+        opt_bytes_32(py, r.commitment.model_spec_hash),
+    )?;
+    commitment.set_item(
+        "decode_spec_hash",
+        opt_bytes_32(py, r.commitment.decode_spec_hash),
+    )?;
+    commitment.set_item(
+        "output_spec_hash",
+        opt_bytes_32(py, r.commitment.output_spec_hash),
+    )?;
+    commitment.set_item("n_prompt_tokens", r.commitment.n_prompt_tokens)?;
+
+    let dict = PyDict::new(py);
+    dict.set_item("token_index", r.token_index)?;
+    dict.set_item("token_id", r.token_id)?;
+    dict.set_item("prev_io_hash", PyBytes::new(py, &r.prev_io_hash))?;
+    dict.set_item("revealed_seed", PyBytes::new(py, &r.revealed_seed))?;
+    dict.set_item("output_text", r.output_text.as_deref())?;
+    dict.set_item(
+        "prompt",
+        r.prompt.as_deref().map(|b| PyBytes::new(py, b)),
+    )?;
+    dict.set_item("n_prompt_tokens", r.n_prompt_tokens)?;
+    dict.set_item("prefix_token_ids", r.prefix_token_ids.clone())?;
+    dict.set_item("commitment", commitment)?;
+    Ok(dict)
+}
+
 /// Verify that externally-computed prompt token IDs match the committed token chain.
 ///
 /// The caller tokenizes the raw prompt using the committed InputSpec and passes
@@ -2617,6 +2735,7 @@ fn verilm_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(diagnose_cast_order, m)?)?;
     m.add_class::<CaptureHook>()?;
     m.add_function(wrap_pyfunction!(verify_input_tokenization, m)?)?;
+    m.add_function(wrap_pyfunction!(deserialize_v4_audit, m)?)?;
     m.add_function(wrap_pyfunction!(deterministic_attention_bf16, m)?)?;
     Ok(())
 }
diff --git a/crates/verilm-py/tests/test_deserialize_v4_audit.py b/crates/verilm-py/tests/test_deserialize_v4_audit.py
new file mode 100644
index 0000000..b10fcd5
--- /dev/null
+++ b/crates/verilm-py/tests/test_deserialize_v4_audit.py
@@ -0,0 +1,35 @@
+"""Tests for verilm_rs.deserialize_v4_audit — exposes the publicly-committed
+fields of a V4 audit binary as a Python dict so downstream callers can
+inspect `output_text`, commitment hashes, and prompt bytes without
+re-implementing the binary wire format.
+
+Full-path tests that exercise a real V4 audit binary end-to-end live in
+the Rust-side prover/verifier integration tests. These tests focus on
+the Python surface: symbol presence, error shape on malformed input.
+"""
+
+import pytest
+import verilm_rs
+
+
+class TestDeserializeV4Audit:
+    def test_symbol_is_exported(self):
+        """The function must be registered on the module."""
+        assert hasattr(verilm_rs, "deserialize_v4_audit")
+        assert callable(verilm_rs.deserialize_v4_audit)
+
+    def test_rejects_non_audit_bytes(self):
+        """Garbage bytes should raise a ValueError, not crash or
+        return a half-constructed dict."""
+        with pytest.raises(ValueError, match="deserialize"):
+            verilm_rs.deserialize_v4_audit(b"not-a-real-audit-binary")
+
+    def test_rejects_empty_bytes(self):
+        with pytest.raises(ValueError, match="deserialize"):
+            verilm_rs.deserialize_v4_audit(b"")
+
+    def test_rejects_truncated_magic(self):
+        """A few bytes that don't match the binary header must fail
+        cleanly."""
+        with pytest.raises(ValueError, match="deserialize"):
+            verilm_rs.deserialize_v4_audit(b"\x00\x01\x02\x03")