Skip to content

Commit da70628

Browse files
committed
feat: add SessionMemory-backed compaction
1 parent b8208ba commit da70628

8 files changed

Lines changed: 489 additions & 10 deletions

File tree

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ require (
88
github.com/charmbracelet/bubbletea v1.3.10
99
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
1010
github.com/muesli/reflow v0.3.0
11-
github.com/voocel/agentcore v1.6.1
11+
github.com/voocel/agentcore v1.6.3
1212
github.com/voocel/mcp-sdk-go v1.2.7
1313
gopkg.in/yaml.v3 v3.0.1
1414
)
@@ -37,7 +37,7 @@ require (
3737
github.com/muesli/termenv v0.16.0 // indirect
3838
github.com/rivo/uniseg v0.4.7 // indirect
3939
github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect
40-
github.com/voocel/litellm v1.6.5 // indirect
40+
github.com/voocel/litellm v1.6.6 // indirect
4141
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
4242
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
4343
golang.org/x/image v0.38.0 // indirect

go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEV
7070
github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU=
7171
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
7272
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
73-
github.com/voocel/agentcore v1.6.1 h1:eUf+R4Wb0QrK6A/o3J0Uhkv+LXs+MmlUN1nBrXViroo=
74-
github.com/voocel/agentcore v1.6.1/go.mod h1:gG3cQIuoG5extUNWrmMmRov72yrqfBassNlU66SJSKc=
75-
github.com/voocel/litellm v1.6.5 h1:4UaZ0Br9P0+0cJvgrj/V0OBUWi8ILwfSFCDrd0GJbLk=
76-
github.com/voocel/litellm v1.6.5/go.mod h1:6MBUu3I4DHm7h72Vl+3nqLruSwYmgqMf/I9BGoordJ4=
73+
github.com/voocel/agentcore v1.6.3 h1:vmFg0LmzEXuwSbVLpb2JxdhuM8qfP3saJP8+aD0dKgk=
74+
github.com/voocel/agentcore v1.6.3/go.mod h1:lbkVVDVg3/0JlUsuQYcQFTEPP0US+ZAoFV58dT4O3qg=
75+
github.com/voocel/litellm v1.6.6 h1:c0hmeQlBspz/Ic+W9BRvu13z1L/Zfq/Ec5B/8dD2KaQ=
76+
github.com/voocel/litellm v1.6.6/go.mod h1:6MBUu3I4DHm7h72Vl+3nqLruSwYmgqMf/I9BGoordJ4=
7777
github.com/voocel/mcp-sdk-go v1.2.7 h1:R1AF8HSUjHmwWc17kjmzEJKQzYEeP3XMh3n7Kr6Yvro=
7878
github.com/voocel/mcp-sdk-go v1.2.7/go.mod h1:WWxi9LOXowHDF9UqO1WSs49iScWnr7lRZV+QxDkU1GM=
7979
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=

internal/agent/session.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,9 @@ type Session struct {
106106
lazyPersist bool
107107
pendingUserMsg []agentcore.Message
108108
autoNamed bool
109-
lastAssistantStart time.Time // set at EventMessageStart (assistant), consumed at EventMessageEnd for latency_ms
110-
cacheSnap cacheSnapshot // previous turn's system/tools fingerprint + cache_read, updated after every LLM call
109+
lastAssistantStart time.Time // set at EventMessageStart (assistant), consumed at EventMessageEnd for latency_ms
110+
cacheSnap cacheSnapshot // previous turn's system/tools fingerprint + cache_read, updated after every LLM call
111+
sessionMemory sessionMemoryState // background extraction bookkeeping — see session_memory.go
111112
pendingToolCalls map[string]pendingToolCall
112113
recentToolCalls []toolCallFingerprint
113114
pendingReminderContinue bool

internal/agent/session_memory.go

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
package agent
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"path/filepath"
8+
"strings"
9+
"sync"
10+
"time"
11+
12+
"github.com/voocel/agentcore"
13+
agentctx "github.com/voocel/agentcore/context"
14+
"github.com/voocel/codebot/internal/config"
15+
)
16+
17+
// SessionMemory is a project-scoped, markdown-formatted running summary of the
18+
// user's collaboration with the agent. It is updated in the background after
19+
// high-signal turns so that autoCompact (future work) and session resume can
20+
// inherit accumulated context without re-summarizing the full transcript.
21+
//
22+
// Design intentionally mirrors Claude Code v2.1.88's SessionMemory
23+
// (services/SessionMemory/sessionMemory.ts) — same thresholds, same template
24+
// anatomy, same stale-detection heuristic — so that behavior is predictable
25+
// for anyone familiar with CC's prompt-caching discipline.
26+
27+
const (
28+
// sessionMemoryInitTokens is the prompt-tokens threshold that triggers the
29+
// first extraction. Below this, the session is considered too short to be
30+
// worth summarizing. Matches CC's minimumMessageTokensToInit (10_000).
31+
sessionMemoryInitTokens = 10_000
32+
33+
// sessionMemoryUpdateTokens is the minimum token delta between extractions.
34+
// Matches CC's minimumTokensBetweenUpdate (5_000).
35+
sessionMemoryUpdateTokens = 5_000
36+
37+
// sessionMemoryExtractionTimeout bounds one extraction's model call. The
38+
// ctx passed to ephemeralQuery cancels at this point; the defer then
39+
// clears extractionStartedAt so another extraction may run.
40+
sessionMemoryExtractionTimeout = 90 * time.Second
41+
42+
// sessionMemoryStaleAfter: an extraction slot held past this point is
43+
// considered dead (leaked goroutine, disk I/O stuck past the ctx cancel,
44+
// etc.) and the gate reclaims it. Must exceed extractionTimeout with a
45+
// safety margin — otherwise the gate could release the slot while the
46+
// original goroutine is still running model.Generate concurrently, which
47+
// ChatModel does not promise to tolerate (see agentcore/context/summary.go).
48+
sessionMemoryStaleAfter = 2 * sessionMemoryExtractionTimeout
49+
50+
// sessionMemoryMaxExtractionTokens caps the response we accept from the
51+
// model — the template is around 1k tokens filled, 3k gives generous
52+
// headroom while still much cheaper than re-running autoCompact.
53+
sessionMemoryMaxExtractionTokens = 3000
54+
)
55+
56+
// sessionMemoryTemplate is the initial scaffold written on first extraction
57+
// when no memory file exists. Section headers are what the model reads and
58+
// updates in place; italic body text is guidance for the model.
59+
const sessionMemoryTemplate = `# Session Memory
60+
61+
## Current State
62+
_In-progress work and its status. One paragraph._
63+
64+
## Task Specification
65+
_Goals, constraints, acceptance criteria, user preferences._
66+
67+
## Files and Functions
68+
_Paths touched, with a one-line purpose each. Group by directory._
69+
70+
## Workflow
71+
_Ongoing multi-step tactic (if any). Omit if nothing is mid-flight._
72+
73+
## Errors & Corrections
74+
_What went wrong, what fix landed, what to avoid next time._
75+
76+
## Learnings
77+
_Non-obvious facts about the codebase, the user's habits, project idioms._
78+
79+
## Worklog
80+
_Chronological bullet list of high-signal turns. Most recent last._
81+
`
82+
83+
// sessionMemoryUpdatePrompt asks the model to re-emit the memory body with new
84+
// information folded in. We deliberately include the previous content so the
85+
// model can preserve structure and only update what changed — matching CC's
86+
// incremental update pattern.
87+
const sessionMemoryUpdatePrompt = `You maintain a running session memory for this coding collaboration. Update the memory below to reflect the conversation so far.
88+
89+
RULES:
90+
- Preserve every section header exactly, even if empty.
91+
- Replace italic guidance lines with real content as sections get populated.
92+
- Do not invent facts. If a section has no information, leave its guidance line.
93+
- Keep each section concise (aim for 200 words max per section).
94+
- Reply with ONLY the updated markdown body. No preamble, no code fences, no commentary.
95+
96+
<current-session-memory>
97+
%s
98+
</current-session-memory>`
99+
100+
// sessionMemoryState tracks extraction bookkeeping for a single session.
101+
// It is embedded in Session (not module-global) so multiple sessions in the
102+
// same process do not cross-contaminate — a detail CC dodges because its
103+
// extraction lives on module state, but Go servers/tests run concurrently.
104+
type sessionMemoryState struct {
105+
mu sync.Mutex
106+
107+
initialized bool
108+
tokensAtLast int
109+
extractionStartedAt time.Time // zero when idle
110+
}
111+
112+
// SessionMemory is the on-disk shape of a memory file. We write the body as
113+
// plain markdown (no frontmatter) so the file is readable and editable by a
114+
// human. UpdatedAt comes from the filesystem mtime, not a stored field.
115+
type SessionMemory struct {
116+
Content string
117+
UpdatedAt time.Time
118+
}
119+
120+
// loadSessionMemory reads the on-disk memory for this session's project. It
121+
// returns (nil, nil) when the file does not exist — callers should fall back
122+
// to the default template.
123+
func (s *Session) loadSessionMemory() (*SessionMemory, error) {
124+
path := config.SessionMemoryPath(s.cwd)
125+
data, err := os.ReadFile(path)
126+
if err != nil {
127+
if os.IsNotExist(err) {
128+
return nil, nil
129+
}
130+
return nil, fmt.Errorf("read session memory: %w", err)
131+
}
132+
return &SessionMemory{Content: string(data), UpdatedAt: fileModTime(path)}, nil
133+
}
134+
135+
// saveSessionMemory writes the memory atomically. A temp file + rename keeps
136+
// a concurrent reader from seeing a half-written file.
137+
func (s *Session) saveSessionMemory(content string) error {
138+
path := config.SessionMemoryPath(s.cwd)
139+
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
140+
return fmt.Errorf("mkdir session memory: %w", err)
141+
}
142+
tmp := path + ".tmp"
143+
if err := os.WriteFile(tmp, []byte(content), 0o644); err != nil {
144+
return fmt.Errorf("write tmp: %w", err)
145+
}
146+
if err := os.Rename(tmp, path); err != nil {
147+
_ = os.Remove(tmp)
148+
return fmt.Errorf("rename: %w", err)
149+
}
150+
return nil
151+
}
152+
153+
func fileModTime(path string) time.Time {
154+
info, err := os.Stat(path)
155+
if err != nil {
156+
return time.Time{}
157+
}
158+
return info.ModTime()
159+
}
160+
161+
// maybeExtractSessionMemory is the gate that decides whether to kick off a
162+
// background extraction. Called after every assistant message_end. The three
163+
// guard conditions are:
164+
// 1. an extraction is not already in-flight (stale-aware);
165+
// 2. the session has crossed the init or update token threshold;
166+
// 3. the last message is safe to summarize up to (no pending tool_use).
167+
//
168+
// All decisions are cheap — the actual model call happens on a goroutine.
169+
func (p *sessionPersistence) maybeExtractSessionMemory() {
170+
s := p.session
171+
172+
msgs := s.agent.Messages()
173+
if !isSafeSummaryBoundary(msgs) {
174+
// Trailing assistant tool_use awaiting results — postpone.
175+
return
176+
}
177+
178+
total := agentctx.EstimateTotal(msgs)
179+
180+
s.sessionMemory.mu.Lock()
181+
// Stale cleanup: if a prior extraction started but never completed,
182+
// release the lock so we can try again.
183+
if !s.sessionMemory.extractionStartedAt.IsZero() &&
184+
time.Since(s.sessionMemory.extractionStartedAt) > sessionMemoryStaleAfter {
185+
s.sessionMemory.extractionStartedAt = time.Time{}
186+
}
187+
if !s.sessionMemory.extractionStartedAt.IsZero() {
188+
s.sessionMemory.mu.Unlock()
189+
return
190+
}
191+
192+
initNeeded := !s.sessionMemory.initialized && total >= sessionMemoryInitTokens
193+
delta := total - s.sessionMemory.tokensAtLast
194+
updateNeeded := s.sessionMemory.initialized && delta >= sessionMemoryUpdateTokens
195+
if !initNeeded && !updateNeeded {
196+
s.sessionMemory.mu.Unlock()
197+
return
198+
}
199+
200+
s.sessionMemory.extractionStartedAt = time.Now()
201+
gen := s.generation
202+
s.sessionMemory.mu.Unlock()
203+
204+
go s.runSessionMemoryExtraction(gen, total)
205+
}
206+
207+
// runSessionMemoryExtraction is the background worker. It must only mutate
208+
// sessionMemoryState through the guarded helpers below so that a stale
209+
// goroutine from a swapped-out session cannot corrupt the current one.
210+
func (s *Session) runSessionMemoryExtraction(gen uint64, tokens int) {
211+
defer func() {
212+
s.sessionMemory.mu.Lock()
213+
s.sessionMemory.extractionStartedAt = time.Time{}
214+
s.sessionMemory.mu.Unlock()
215+
}()
216+
217+
// Bail if the session was switched / closed while we were queued.
218+
if !s.matchesGeneration(gen) {
219+
return
220+
}
221+
222+
ctx, cancel := context.WithTimeout(context.Background(), sessionMemoryExtractionTimeout)
223+
defer cancel()
224+
225+
existing, _ := s.loadSessionMemory()
226+
body := sessionMemoryTemplate
227+
if existing != nil && strings.TrimSpace(existing.Content) != "" {
228+
body = existing.Content
229+
}
230+
231+
prompt := fmt.Sprintf(sessionMemoryUpdatePrompt, body)
232+
resp, err := s.ephemeralQuery(ctx, prompt,
233+
agentcore.WithMaxTokens(sessionMemoryMaxExtractionTokens),
234+
)
235+
if err != nil {
236+
s.emit(SessionEvent{Type: SEError, Error: fmt.Errorf("session memory extract: %w", err)})
237+
return
238+
}
239+
240+
content := strings.TrimSpace(resp.Message.TextContent())
241+
if content == "" {
242+
return
243+
}
244+
content = stripCodeFence(content)
245+
246+
if err := s.saveSessionMemory(content); err != nil {
247+
s.emit(SessionEvent{Type: SEError, Error: fmt.Errorf("session memory save: %w", err)})
248+
return
249+
}
250+
251+
// Re-check generation before committing state so a late-landing goroutine
252+
// doesn't overwrite a fresh session's bookkeeping.
253+
if !s.matchesGeneration(gen) {
254+
return
255+
}
256+
s.sessionMemory.mu.Lock()
257+
s.sessionMemory.initialized = true
258+
s.sessionMemory.tokensAtLast = tokens
259+
s.sessionMemory.mu.Unlock()
260+
}
261+
262+
// matchesGeneration reports whether the session has not been switched since
263+
// the background goroutine was dispatched.
264+
func (s *Session) matchesGeneration(gen uint64) bool {
265+
s.mu.Lock()
266+
defer s.mu.Unlock()
267+
return s.generation == gen
268+
}
269+
270+
// isSafeSummaryBoundary reports whether the current tail of the conversation
271+
// is safe to summarize up to. A trailing assistant message carrying tool_use
272+
// calls whose results have not landed yet would be orphaned by an extraction
273+
// cut there, so we postpone until the tool_result turn lands.
274+
func isSafeSummaryBoundary(msgs []agentcore.AgentMessage) bool {
275+
if len(msgs) == 0 {
276+
return false
277+
}
278+
last := msgs[len(msgs)-1]
279+
if last.GetRole() == agentcore.RoleAssistant && last.HasToolCalls() {
280+
return false
281+
}
282+
return true
283+
}
284+
285+
// SessionMemorySeedFn returns a closure suitable for plugging into
286+
// agentcore's SessionMemoryStrategy. The closure reads the project-scoped
287+
// memory file on each compaction attempt and returns the empty string when
288+
// the file is missing or still matches the initial template — both of which
289+
// signal "no useful memory yet, fall through to LLM summarization".
290+
func SessionMemorySeedFn(cwd string) func() (string, error) {
291+
return func() (string, error) {
292+
data, err := os.ReadFile(config.SessionMemoryPath(cwd))
293+
if err != nil {
294+
if os.IsNotExist(err) {
295+
return "", nil
296+
}
297+
return "", err
298+
}
299+
body := strings.TrimSpace(string(data))
300+
if body == "" {
301+
return "", nil
302+
}
303+
if body == strings.TrimSpace(sessionMemoryTemplate) {
304+
return "", nil
305+
}
306+
return body, nil
307+
}
308+
}
309+
310+
// stripCodeFence removes a surrounding ```...``` if the model wrapped its
311+
// output — a common failure mode despite the "no code fences" instruction.
312+
func stripCodeFence(s string) string {
313+
s = strings.TrimSpace(s)
314+
if !strings.HasPrefix(s, "```") {
315+
return s
316+
}
317+
// Drop opening fence (possibly with language tag).
318+
if idx := strings.IndexByte(s, '\n'); idx >= 0 {
319+
s = s[idx+1:]
320+
}
321+
s = strings.TrimSuffix(s, "```")
322+
return strings.TrimSpace(s)
323+
}

0 commit comments

Comments
 (0)