perf: stop deferring core tools

voocel · voocel · commit a1fb7b4891b2 · 2026-04-16T10:55:38.000-04:00
diff --git a/internal/bootstrap/assemble_session.go b/internal/bootstrap/assemble_session.go
@@ -173,9 +173,31 @@ func buildHookSupport(input *resolvedInput, services *bootServices, settings con
 }
 
 // coreToolNames are tools that remain always visible to the LLM.
-// When tool search is enabled, all tools except tool_search itself are deferred.
-// tool_search is added separately and never appears in the deferred set.
-var coreToolNames = map[string]bool{}
+// Tools NOT in this set are deferred behind tool_search when the model
+// supports it. The default is opt-in: frequently used core tools stay
+// in the main prompt so the model can call them turn 1 without a
+// tool_search round-trip; rarely used or schema-heavy tools defer to
+// keep the base prompt compact.
+var coreToolNames = map[string]bool{
+	// Filesystem + shell — used in virtually every turn.
+	"read": true,
+	"write": true,
+	"edit": true,
+	"bash": true,
+	"grep": true,
+	"glob": true,
+	"ls":   true,
+	// Task management — if present, should be immediately callable
+	// (the system prompt tells the model to use them proactively).
+	"task_create": true,
+	"task_update": true,
+	"task_list":   true,
+	"task_get":    true,
+	// Interaction / plan mode — turn-1 UX primitives.
+	"ask_user":        true,
+	"enter_plan_mode": true,
+	"exit_plan_mode":  true,
+}
 
 // supportsToolSearch reports whether the given provider/model combination
 // supports deferred tool search. Currently only Claude models and GPT-5.4+
diff --git a/internal/config/prompt.go b/internal/config/prompt.go
@@ -74,11 +74,47 @@ Break down and manage your work with task_create, task_update, and task_list.
 - Check task_list before creating more tasks if a relevant task may already exist
 - After completing a task, call task_list to find the next pending or unblocked task`
 	}
+	doingTasksInstructions := `## Doing tasks
+- The user will primarily request you to perform software engineering tasks. These may include solving bugs, adding new functionality, refactoring code, explaining code, and more. When given an unclear or generic instruction, consider it in the context of these software engineering tasks and the current working directory. For example, if the user asks you to change "methodName" to snake case, do not reply with just "method_name", instead find the method in the code and modify the code.
+- You are highly capable and often allow users to complete ambitious tasks that would otherwise be too complex or take too long. You should defer to user judgement about whether a task is too large to attempt.
+- In general, do not propose changes to code you haven't read. If a user asks about or wants you to modify a file, read it first. Understand existing code before suggesting modifications.
+- Do not create files unless they're absolutely necessary for achieving your goal. Generally prefer editing an existing file to creating a new one, as this prevents file bloat and builds on existing work more effectively.
+- Avoid giving time estimates or predictions for how long tasks will take, whether for your own work or for users planning projects. Focus on what needs to be done, not how long it might take.
+- If an approach fails, diagnose why before switching tactics—read the error, check your assumptions, try a focused fix. Don't retry the identical action blindly, but don't abandon a viable approach after a single failure either. Escalate to the user with ask_user only when you're genuinely stuck after investigation, not as a first response to friction.
+- Be careful not to introduce security vulnerabilities such as command injection, XSS, SQL injection, and other OWASP top 10 vulnerabilities. If you notice that you wrote insecure code, immediately fix it. Prioritize writing safe, secure, and correct code.
+- Don't add features, refactor code, or make "improvements" beyond what was asked. A bug fix doesn't need surrounding code cleaned up. A simple feature doesn't need extra configurability. Don't add docstrings, comments, or type annotations to code you didn't change. Only add comments where the logic isn't self-evident.
+- Don't add error handling, fallbacks, or validation for scenarios that can't happen. Trust internal code and framework guarantees. Only validate at system boundaries (user input, external APIs). Don't use feature flags or backwards-compatibility shims when you can just change the code.
+- Don't create helpers, utilities, or abstractions for one-time operations. Don't design for hypothetical future requirements. The right amount of complexity is what the task actually requires—no speculative abstractions, but no half-finished implementations either. Three similar lines of code is better than a premature abstraction.
+- Avoid backwards-compatibility hacks like renaming unused _vars, re-exporting types, adding // removed comments for removed code, etc. If you are certain that something is unused, you can delete it completely.`
+
+	usingYourToolsInstructions := `## Using your tools
+- Do NOT use bash to run commands when a relevant dedicated tool is provided. Using dedicated tools allows the user to better understand and review your work. This is CRITICAL to assisting the user:
+  - To read files use read instead of cat, head, tail, or sed
+  - To edit files use edit instead of sed or awk
+  - To create files use write instead of cat with heredoc or echo redirection
+  - To search for files use glob instead of find or ls
+  - To search the content of files, use grep instead of grep or rg
+  - Reserve using bash exclusively for system commands and terminal operations that require shell execution. If you are unsure and there is a relevant dedicated tool, default to using the dedicated tool and only fallback on using bash for these if it is absolutely necessary.
+- You can call multiple tools in a single response. If you intend to call multiple tools and there are no dependencies between them, make all independent tool calls in parallel. Maximize use of parallel tool calls where possible to increase efficiency. However, if some tool calls depend on previous calls to inform dependent values, do NOT call these tools in parallel and instead call them sequentially. For instance, if one operation must complete before another starts, run these operations sequentially instead.`
+
+	outputEfficiencyInstructions := `## Output efficiency
+
+IMPORTANT: Go straight to the point. Try the simplest approach first without going in circles. Do not overdo it. Be extra concise.
+
+Keep your text output brief and direct. Lead with the answer or action, not the reasoning. Skip filler words, preamble, and unnecessary transitions. Do not restate what the user said — just do it. When explaining, include only what is necessary for the user to understand.
+
+Focus text output on:
+- Decisions that need the user's input
+- High-level status updates at natural milestones
+- Errors or blockers that change the plan
+
+If you can say it in one sentence, don't use three. Prefer short, direct sentences over long explanations. This does not apply to code or tool calls.`
 	autoMemoryInstructions := BuildAutoMemoryInstructions(ctx.MemoryDir)
 	var instructionParts []string
 	if toolsBody.Len() > 0 {
 		instructionParts = append(instructionParts, toolsBody.String())
 	}
+	instructionParts = append(instructionParts, doingTasksInstructions, usingYourToolsInstructions, outputEfficiencyInstructions)
 	if taskManagementInstructions != "" {
 		instructionParts = append(instructionParts, taskManagementInstructions)
 	}
diff --git a/internal/config/prompt_test.go b/internal/config/prompt_test.go
@@ -43,6 +43,30 @@ func TestBuildSystemBlockTextsNoTools(t *testing.T) {
 	}
 }
 
+func TestBuildSystemBlockTextsIncludesDoingTasksGuardrails(t *testing.T) {
+	t.Parallel()
+
+	_, instructions := BuildSystemBlockTexts("/tmp/ws", ContextFiles{}, []ToolInfo{{Name: "read"}})
+
+	for _, marker := range []string{
+		"## Doing tasks",
+		`"improvements" beyond what was asked`,
+		"scenarios that can't happen",
+		"premature abstraction",
+		"diagnose why before switching tactics",
+		"OWASP top 10",
+		"backwards-compatibility hacks",
+		"## Using your tools",
+		"Maximize use of parallel tool calls",
+		"## Output efficiency",
+		"Go straight to the point",
+	} {
+		if !strings.Contains(instructions, marker) {
+			t.Errorf("instructions missing guardrail %q", marker)
+		}
+	}
+}
+
 func TestBuildSystemBlockTextsAddsTaskManagementSection(t *testing.T) {
 	t.Parallel()
 
diff --git a/internal/ui/app.go b/internal/ui/app.go
@@ -433,6 +433,6 @@ func (a *App) execShell(cmd string) tea.Cmd {
 		} else if err != nil {
 			result = err.Error()
 		}
-		return tui.CommandResultMsg{Text: tui.CommandStyle.Render(result)}
+		return tui.CommandResultMsg{Text: tui.CommandStyle.Render(result), Inline: true}
 	}
 }
diff --git a/internal/ui/tui/events.go b/internal/ui/tui/events.go
@@ -10,13 +10,31 @@ import (
 	"github.com/voocel/agentcore"
 )
 
+// formatScrollbackBlock applies the project's standard spacing rules to a
+// block of scrollback output: trailing newlines stripped, and optionally a
+// leading blank line so the block is visually separated from what came
+// before. Kept pure so the two print helpers and the tests can share it.
+func formatScrollbackBlock(content string, inline bool) string {
+	content = strings.TrimRight(content, "\n")
+	if inline {
+		return content
+	}
+	return "\n" + content
+}
+
 // printBlock prints content to terminal scrollback with a leading blank line.
 // Every top-level output block (assistant reply, tool result, error) should
 // use this instead of raw tea.Println so blocks are visually separated by
-// exactly one blank line. Trailing newlines are stripped so that spacing
-// between blocks is always consistent regardless of content construction.
+// exactly one blank line.
 func printBlock(content string) tea.Cmd {
-	return tea.Println("\n" + strings.TrimRight(content, "\n"))
+	return tea.Println(formatScrollbackBlock(content, false))
+}
+
+// printInline prints content flush against the previous block (no leading
+// blank line). Use for output that should feel like a direct continuation
+// of what came before — e.g. shell command output under its echoed prompt.
+func printInline(content string) tea.Cmd {
+	return tea.Println(formatScrollbackBlock(content, true))
 }
 
 // HandleAgentEvent processes agent events.
diff --git a/internal/ui/tui/messages.go b/internal/ui/tui/messages.go
@@ -13,9 +13,13 @@ type AgentEventMsg struct {
 
 // CommandResultMsg carries the result of a slash command back to the model.
 type CommandResultMsg struct {
-	Text        string
-	Quit        bool   // true for /exit
-	Clear       bool   // true for /clear
+	Text string
+	// Inline prints the result flush against the previous scrollback block
+	// (no leading blank line). Use for output that should feel like a direct
+	// continuation — e.g. shell command output under its echoed prompt.
+	Inline           bool
+	Quit             bool   // true for /exit
+	Clear            bool   // true for /clear
 	NewProvider      string // non-empty if provider was switched
 	NewModel         string // non-empty if model was switched
 	NewContextWindow int    // non-zero if context window changed
diff --git a/internal/ui/tui/model_test.go b/internal/ui/tui/model_test.go
@@ -174,6 +174,30 @@ func TestHandleCommandResultUpdatesProviderAndModel(t *testing.T) {
 	}
 }
 
+func TestFormatScrollbackBlock(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name    string
+		content string
+		inline  bool
+		want    string
+	}{
+		{name: "block adds leading blank line", content: "  ok", inline: false, want: "\n  ok"},
+		{name: "inline stays flush", content: "  ok", inline: true, want: "  ok"},
+		{name: "block strips trailing newlines", content: "hello\n\n", inline: false, want: "\nhello"},
+		{name: "inline strips trailing newlines", content: "hello\n\n", inline: true, want: "hello"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			if got := formatScrollbackBlock(tc.content, tc.inline); got != tc.want {
+				t.Fatalf("formatScrollbackBlock(%q, %v) = %q, want %q", tc.content, tc.inline, got, tc.want)
+			}
+		})
+	}
+}
+
 func TestOverlayAppearsBelowInput(t *testing.T) {
 	m := New(nil, "anthropic/claude-sonnet-4.6", Config{
 		Overlay: func(*Model) *OverlayState {
diff --git a/internal/ui/tui/render.go b/internal/ui/tui/render.go
@@ -203,7 +203,11 @@ func (m *Model) renderInputPanel() string {
 	sections = append(sections, inputView)
 
 	content := strings.Join(sections, "\n\n")
-	return InputPanelStyle.Width(max(width-2, 20)).Render(content)
+	panelStyle := InputPanelStyle
+	if m.shellInputActive() {
+		panelStyle = ShellInputPanelStyle
+	}
+	return panelStyle.Width(max(width-2, 20)).Render(content)
 }
 
 // RenderStatusBar renders the status line above the input.
diff --git a/internal/ui/tui/render_test.go b/internal/ui/tui/render_test.go
@@ -63,6 +63,28 @@ func TestRenderContextBarShowsModeIndicator(t *testing.T) {
 	}
 }
 
+func TestRenderInputPanelHighlightsShellMode(t *testing.T) {
+	m := New(nil, "anthropic/claude-sonnet-4.6")
+	m.Ready = true
+	m.Width = 80
+	m.Input.SetValue("!git status")
+
+	if !m.shellInputActive() {
+		t.Fatal("expected shell input mode to activate for !-prefixed input")
+	}
+}
+
+func TestRenderInputPanelUsesDefaultStyleWithoutShellPrefix(t *testing.T) {
+	m := New(nil, "anthropic/claude-sonnet-4.6")
+	m.Ready = true
+	m.Width = 80
+	m.Input.SetValue("git status")
+
+	if m.shellInputActive() {
+		t.Fatal("did not expect shell input mode without ! prefix")
+	}
+}
+
 func TestIndentBlock(t *testing.T) {
 	cases := []struct {
 		name   string
diff --git a/internal/ui/tui/styles.go b/internal/ui/tui/styles.go
@@ -207,6 +207,21 @@ var (
 			BorderForeground(ColorInputChrome).
 			Padding(0, 1)
 
+	ShellInputPanelStyle = lipgloss.NewStyle().
+			Border(lipgloss.Border{Top: "─", Bottom: "─"}).
+			BorderTop(true).
+			BorderBottom(true).
+			BorderLeft(false).
+			BorderRight(false).
+			BorderForeground(ColorShell).
+			Padding(0, 1)
+
+	// ShellAccentStyle is used for both the prompt caret ("❯") and the "!" prefix
+	// when the input is in shell mode — they share the same foreground/weight by design.
+	ShellAccentStyle = lipgloss.NewStyle().
+				Foreground(ColorShell).
+				Bold(true)
+
 	InputHintStyle = lipgloss.NewStyle().
 			Foreground(ColorMuted)
 
diff --git a/internal/ui/tui/update.go b/internal/ui/tui/update.go
@@ -602,6 +602,9 @@ func (m *Model) handleCommandResult(msg CommandResultMsg) (tea.Model, tea.Cmd) {
 			m.ShowWelcome = false
 		}
 		output += indentBlock(msg.Text, 2)
+		if msg.Inline {
+			return m, printInline(output)
+		}
 		return m, printBlock(output)
 	}
 	return m, nil
diff --git a/internal/ui/tui/view.go b/internal/ui/tui/view.go
@@ -107,6 +107,10 @@ func (m *Model) RenderPromptOutput(text string) string {
 	return userLine
 }
 
+func (m *Model) shellInputActive() bool {
+	return strings.HasPrefix(strings.TrimSpace(m.Input.Value()), "!")
+}
+
 // overlayView returns the rendered overlay content and whether it replaces the input area.
 func (m *Model) overlayView() (string, bool) {
 	if m.config.Overlay == nil {
@@ -131,6 +135,11 @@ func (m *Model) renderCompletions() string {
 // When cmdHighlight is set, the command text in the view is colorized.
 func (m *Model) styledInputView() string {
 	view := m.Input.View()
+	if m.shellInputActive() {
+		view = strings.Replace(view, "❯", ShellAccentStyle.Render("❯"), 1)
+		view = strings.Replace(view, "!", ShellAccentStyle.Render("!"), 1)
+		return view
+	}
 	if m.cmdHighlight == "" {
 		return view
 	}

Original file line number	Diff line number	Diff line change
`@@ -433,6 +433,6 @@ func (a *App) execShell(cmd string) tea.Cmd {`
`433`	`433`	`} else if err != nil {`
`434`	`434`	`result = err.Error()`
`435`	`435`	`}`
`436`		`- return tui.CommandResultMsg{Text: tui.CommandStyle.Render(result)}`
	`436`	`+ return tui.CommandResultMsg{Text: tui.CommandStyle.Render(result), Inline: true}`
`437`	`437`	`}`
`438`	`438`	`}`