voocel
diff --git a/‎internal/agent/cache_monitor.go‎
Lines changed: 108 additions & 0 deletions b/‎internal/agent/cache_monitor.go‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎internal/agent/cache_monitor_test.go‎
Lines changed: 122 additions & 0 deletions b/‎internal/agent/cache_monitor_test.go‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎internal/agent/session.go‎
Lines changed: 2 additions & 0 deletions b/‎internal/agent/session.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎internal/agent/session_prompt.go‎
Lines changed: 6 additions & 0 deletions b/‎internal/agent/session_prompt.go‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎internal/agent/session_state.go‎
Lines changed: 78 additions & 0 deletions b/‎internal/agent/session_state.go‎
Lines changed: 78 additions & 0 deletions
@@ -0,0 +1,108 @@
+package agent
+
+import (
+	"hash/fnv"
+	"sort"
+
+	"github.com/voocel/agentcore"
+	"github.com/voocel/codebot/internal/storage"
+)
+
+// cacheSnapshot captures enough state about one LLM request to diagnose a
+// cache-break on the following turn. It does NOT store the request itself —
+// only cheap fingerprints of the inputs and the observed cache_read figure.
+type cacheSnapshot struct {
+	SystemHash      uint64
+	ToolsHash       uint64
+	CacheReadTokens int
+	Valid           bool // false before the first turn
+}
+
+// breakDropFraction is the minimum relative drop in cache_read (vs previous
+// turn) that we treat as a "break". Mirrors Claude Code's 5% heuristic.
+const breakDropFraction = 0.05
+
+// breakDropAbsolute is the minimum absolute token drop to avoid false positives
+// at small context sizes. Mirrors Claude Code's 2000-token threshold.
+const breakDropAbsolute = 2000
+
+// hashSystemBlocks returns a stable fingerprint for the current system prompt.
+// The fingerprint covers every block's text; cache_control metadata is ignored
+// because flipping a cache-control TTL should not look like a content change.
+func hashSystemBlocks(blocks []agentcore.SystemBlock) uint64 {
+	h := fnv.New64a()
+	for _, b := range blocks {
+		h.Write([]byte{0}) // block separator
+		h.Write([]byte(b.Text))
+	}
+	return h.Sum64()
+}
+
+// hashTools returns a stable fingerprint for the current tool set as the LLM
+// sees it: sorted by name, each entry contributes name + description. Schema
+// is excluded — changing it means breaking cache, and we want the fingerprint
+// to reflect "same name, same caller contract" rather than incidental schema
+// reorderings.
+func hashTools(tools []agentcore.Tool) uint64 {
+	names := make([]string, 0, len(tools))
+	byName := make(map[string]agentcore.Tool, len(tools))
+	for _, t := range tools {
+		n := t.Name()
+		if _, seen := byName[n]; seen {
+			continue
+		}
+		byName[n] = t
+		names = append(names, n)
+	}
+	sort.Strings(names)
+
+	h := fnv.New64a()
+	for _, n := range names {
+		h.Write([]byte{0})
+		h.Write([]byte(n))
+		h.Write([]byte{1})
+		h.Write([]byte(byName[n].Description()))
+	}
+	return h.Sum64()
+}
+
+// detectCacheBreak decides whether the observed cache_read drop between
+// snapshots is large enough to record, and if so produces a structured
+// explanation. Returns nil when no break is detected.
+func detectCacheBreak(prev, curr cacheSnapshot) *storage.CacheBreakInfo {
+	if !prev.Valid {
+		return nil
+	}
+	// A turn with zero previous cache_read has no baseline to drop from.
+	if prev.CacheReadTokens <= 0 {
+		return nil
+	}
+	dropAbs := prev.CacheReadTokens - curr.CacheReadTokens
+	if dropAbs < breakDropAbsolute {
+		return nil
+	}
+	frac := float64(dropAbs) / float64(prev.CacheReadTokens)
+	if frac < breakDropFraction {
+		return nil
+	}
+
+	info := &storage.CacheBreakInfo{
+		PrevCacheReadTokens: prev.CacheReadTokens,
+		CurrCacheReadTokens: curr.CacheReadTokens,
+		DropAbsolute:        dropAbs,
+		DropFraction:        frac,
+		SystemChanged:       prev.SystemHash != curr.SystemHash,
+		ToolsChanged:        prev.ToolsHash != curr.ToolsHash,
+	}
+	switch {
+	case info.SystemChanged && info.ToolsChanged:
+		info.Note = "system prompt and tool set both changed"
+	case info.SystemChanged:
+		info.Note = "system prompt changed between turns"
+	case info.ToolsChanged:
+		info.Note = "tool set changed between turns"
+	default:
+		info.Note = "no input change detected (TTL expiry, provider-side miss, or cache_control not honored by provider)"
+	}
+	return info
+}
@@ -0,0 +1,122 @@
+package agent
+
+import (
+	"testing"
+
+	"github.com/voocel/agentcore"
+)
+
+func TestHashSystemBlocksStableAcrossCallOrder(t *testing.T) {
+	t.Parallel()
+
+	a := []agentcore.SystemBlock{{Text: "identity"}, {Text: "instructions"}}
+	b := []agentcore.SystemBlock{{Text: "identity"}, {Text: "instructions"}}
+	if hashSystemBlocks(a) != hashSystemBlocks(b) {
+		t.Fatalf("identical blocks must hash to the same value")
+	}
+
+	c := []agentcore.SystemBlock{{Text: "identity"}, {Text: "instructions v2"}}
+	if hashSystemBlocks(a) == hashSystemBlocks(c) {
+		t.Fatalf("differing text must change the hash")
+	}
+}
+
+func TestHashSystemBlocksIgnoresCacheControl(t *testing.T) {
+	t.Parallel()
+
+	a := []agentcore.SystemBlock{{Text: "identity", CacheControl: "ephemeral"}}
+	b := []agentcore.SystemBlock{{Text: "identity"}}
+	if hashSystemBlocks(a) != hashSystemBlocks(b) {
+		t.Fatalf("cache_control metadata must not influence the fingerprint")
+	}
+}
+
+func TestHashToolsSortsByName(t *testing.T) {
+	t.Parallel()
+
+	forward := []agentcore.Tool{&stubTool{name: "read", desc: "read files"}, &stubTool{name: "bash", desc: "run shell"}}
+	reverse := []agentcore.Tool{&stubTool{name: "bash", desc: "run shell"}, &stubTool{name: "read", desc: "read files"}}
+	if hashTools(forward) != hashTools(reverse) {
+		t.Fatalf("tool ordering must not affect the fingerprint")
+	}
+}
+
+func TestDetectCacheBreakNilWhenNoBaseline(t *testing.T) {
+	t.Parallel()
+
+	info := detectCacheBreak(cacheSnapshot{Valid: false}, cacheSnapshot{Valid: true, CacheReadTokens: 5000})
+	if info != nil {
+		t.Fatalf("no baseline should suppress detection, got %+v", info)
+	}
+}
+
+func TestDetectCacheBreakIgnoresSmallDrops(t *testing.T) {
+	t.Parallel()
+
+	// 4% drop, well below breakDropFraction.
+	prev := cacheSnapshot{Valid: true, CacheReadTokens: 100000}
+	curr := cacheSnapshot{Valid: true, CacheReadTokens: 96000, SystemHash: prev.SystemHash}
+	if info := detectCacheBreak(prev, curr); info != nil {
+		t.Fatalf("small drop should not trigger, got %+v", info)
+	}
+
+	// 10% drop but absolute delta below 2000.
+	prev = cacheSnapshot{Valid: true, CacheReadTokens: 10000}
+	curr = cacheSnapshot{Valid: true, CacheReadTokens: 9000}
+	if info := detectCacheBreak(prev, curr); info != nil {
+		t.Fatalf("absolute drop below threshold should not trigger, got %+v", info)
+	}
+}
+
+func TestDetectCacheBreakReportsSystemChange(t *testing.T) {
+	t.Parallel()
+
+	prev := cacheSnapshot{Valid: true, SystemHash: 1, ToolsHash: 2, CacheReadTokens: 50000}
+	curr := cacheSnapshot{Valid: true, SystemHash: 9, ToolsHash: 2, CacheReadTokens: 0}
+	info := detectCacheBreak(prev, curr)
+	if info == nil {
+		t.Fatal("expected a cache break to be reported")
+	}
+	if !info.SystemChanged || info.ToolsChanged {
+		t.Fatalf("expected system-only change, got %+v", info)
+	}
+	if info.DropAbsolute != 50000 {
+		t.Fatalf("drop_absolute = %d, want 50000", info.DropAbsolute)
+	}
+	if info.Note == "" {
+		t.Fatal("note should be populated so logs are self-describing")
+	}
+}
+
+func TestDetectCacheBreakReportsUnknownWhenHashesMatch(t *testing.T) {
+	t.Parallel()
+
+	prev := cacheSnapshot{Valid: true, SystemHash: 1, ToolsHash: 2, CacheReadTokens: 80000}
+	curr := cacheSnapshot{Valid: true, SystemHash: 1, ToolsHash: 2, CacheReadTokens: 0}
+	info := detectCacheBreak(prev, curr)
+	if info == nil {
+		t.Fatal("expected a break to be reported even without hash diffs")
+	}
+	if info.SystemChanged || info.ToolsChanged {
+		t.Fatalf("expected neither hash to flip, got %+v", info)
+	}
+}
+
+func TestCompactionEventInvalidatesCacheBaseline(t *testing.T) {
+	t.Parallel()
+
+	s := &Session{cacheSnap: cacheSnapshot{Valid: true, CacheReadTokens: 50000, SystemHash: 1}}
+
+	// An "unchanged" compaction must not reset the baseline — no rewrite happened.
+	s.emit(SessionEvent{Type: SEAutoCompactionEnd, CompactionChanged: false})
+	if !s.cacheSnap.Valid || s.cacheSnap.CacheReadTokens != 50000 {
+		t.Fatalf("unchanged compaction should preserve baseline, got %+v", s.cacheSnap)
+	}
+
+	// A real compaction rewrites the prefix; the baseline must be dropped so
+	// the next turn's expected cache_read drop is not misreported.
+	s.emit(SessionEvent{Type: SEAutoCompactionEnd, CompactionChanged: true})
+	if s.cacheSnap.Valid || s.cacheSnap.CacheReadTokens != 0 {
+		t.Fatalf("changed compaction should invalidate baseline, got %+v", s.cacheSnap)
+	}
+}
@@ -106,6 +106,8 @@ type Session struct {
 	lazyPersist             bool
 	pendingUserMsg          []agentcore.Message
 	autoNamed               bool
+	lastAssistantStart      time.Time     // set at EventMessageStart (assistant), consumed at EventMessageEnd for latency_ms
+	cacheSnap               cacheSnapshot // previous turn's system/tools fingerprint + cache_read, updated after every LLM call
 	pendingToolCalls        map[string]pendingToolCall
 	recentToolCalls         []toolCallFingerprint
 	pendingReminderContinue bool
 
@@ -207,6 +207,12 @@ func (m *sessionPromptManager) rebuildPrompt() {
 	m.session.mu.Lock()
 	orderedSkills := skill.OrderForPrompt(m.session.skills, m.session.cwd, m.session.skillUsageScoresLocked())
 	m.session.staticReminders = config.BuildReminders(m.session.contextFiles, orderedSkills)
+	// Refresh cache-break fingerprints. CacheReadTokens / Valid are owned by
+	// persistLLMCall — only the input hashes are updated here, so a prompt
+	// rebuild mid-session leaves the "previous observed cache_read" intact
+	// and the next turn can still detect a drop.
+	m.session.cacheSnap.SystemHash = hashSystemBlocks(blocks)
+	m.session.cacheSnap.ToolsHash = hashTools(m.session.activeTools)
 	m.session.mu.Unlock()
 }
 
 
@@ -9,6 +9,7 @@ import (
 
 	"github.com/voocel/agentcore"
 	"github.com/voocel/codebot/internal/skill"
+	"github.com/voocel/codebot/internal/storage"
 )
 
 type sessionPersistence struct {
@@ -42,6 +43,14 @@ func (s *Session) Subscribe(fn func(SessionEvent)) func() {
 func (s *Session) handleAgentEvent(ev agentcore.Event) {
 	s.runtime.handleEvent(ev)
 
+	if ev.Type == agentcore.EventMessageStart {
+		if msg, ok := ev.Message.(agentcore.Message); ok && msg.Role == agentcore.RoleAssistant {
+			s.mu.Lock()
+			s.lastAssistantStart = time.Now()
+			s.mu.Unlock()
+		}
+	}
+
 	if ev.Type == agentcore.EventMessageEnd {
 		if msg, ok := ev.Message.(agentcore.Message); ok {
 			s.persistence.handleMessageEnd(msg)
@@ -96,6 +105,16 @@ func (s *Session) emit(ev SessionEvent) {
 		if ev.AgentEvent != nil && ev.AgentEvent.Type == agentcore.EventError {
 			s.recordErrorDiagnostic(ev.AgentEvent.Err)
 		}
+	case SEAutoCompactionEnd:
+		if ev.CompactionChanged {
+			// A compaction rewrites the prompt prefix: the next turn's
+			// cache_read drop is expected, not a bug. Invalidate the cache
+			// baseline so detectCacheBreak does not flag it as a break.
+			s.mu.Lock()
+			s.cacheSnap.CacheReadTokens = 0
+			s.cacheSnap.Valid = false
+			s.mu.Unlock()
+		}
 	}
 
 	s.mu.Lock()
@@ -238,10 +257,69 @@ func (p *sessionPersistence) handleMessageEnd(msg agentcore.Message) {
 	p.persistMessage(msg)
 
 	if msg.Role == agentcore.RoleAssistant {
+		p.persistLLMCall(msg)
 		p.tryAutoName()
 	}
 }
 
+// persistLLMCall writes a per-turn observability record for the just-finished
+// assistant response. Non-fatal: logging failures are surfaced via SEError but
+// never block the session. Skipped when usage is empty (e.g. recovered message).
+func (p *sessionPersistence) persistLLMCall(msg agentcore.Message) {
+	if msg.Usage == nil {
+		return
+	}
+	u := msg.Usage
+	if u.Input == 0 && u.Output == 0 && u.TotalTokens == 0 {
+		return
+	}
+
+	p.session.mu.Lock()
+	store := p.session.store
+	start := p.session.lastAssistantStart
+	p.session.lastAssistantStart = time.Time{}
+	provider := p.session.provider
+	model := p.session.modelName
+	thinking := p.session.settings.ThinkingLevel
+	prevSnap := p.session.cacheSnap
+	currSnap := cacheSnapshot{
+		SystemHash:      p.session.cacheSnap.SystemHash,
+		ToolsHash:       p.session.cacheSnap.ToolsHash,
+		CacheReadTokens: u.CacheRead,
+		Valid:           true,
+	}
+	p.session.cacheSnap = currSnap
+	p.session.mu.Unlock()
+
+	if store == nil {
+		return
+	}
+
+	var latencyMs int64
+	if !start.IsZero() {
+		latencyMs = time.Since(start).Milliseconds()
+	}
+	entry := storage.LLMCallEntry{
+		Provider:            provider,
+		Model:               model,
+		InputTokens:         u.Input,
+		OutputTokens:        u.Output,
+		CacheReadTokens:     u.CacheRead,
+		CacheCreationTokens: u.CacheWrite,
+		TotalTokens:         u.TotalTokens,
+		LatencyMs:           latencyMs,
+		StopReason:          string(msg.StopReason),
+		ThinkingLevel:       thinking,
+		CacheBreak:          detectCacheBreak(prevSnap, currSnap),
+	}
+	if err := store.AppendLLMCall(entry); err != nil {
+		p.session.emit(SessionEvent{
+			Type:  SEError,
+			Error: fmt.Errorf("persist llm_call: %w", err),
+		})
+	}
+}
+
 func (p *sessionPersistence) persistMessage(msg agentcore.Message) {
 	p.session.mu.Lock()
 	store := p.session.store