Skip to content

Commit 4b58bff

Browse files
JAORMXclaude
andcommitted
Add guest init binary, hooks, and wiring for go-microvm runtime
The go-microvm runtime skeleton was solid but couldn't boot MCP server images because it lacked guest-side infrastructure. go-microvm uses gvproxy networking (virtio-net), so the guest must configure its own network via DHCP — libkrun's built-in init doesn't do this. This adds: - thv-vm-init: PID 1 guest binary that boots (mounts, DHCP, SSH via go-microvm/guest/boot), reads /etc/thv-entrypoint.json for the original OCI cmd/env/workdir, execs the MCP server as a child, forwards signals, and halts the VM cleanly on exit - RootFS hooks: InjectInitBinary (embeds compiled binary), InjectEntrypoint (captures OCI config before init override), InjectEntrypointOverride (explicit command), InjectSSHKeys - libkrun backend with WithUserNamespaceUID(1000,1000) for virtiofs passthrough, WithCleanDataDir, WithLogLevel, WithImageCache - HTTP readiness probe (exponential backoff, accepts any response) - Recovered VM lifecycle fix: StopWorkload/RemoveWorkload now kill orphaned runner processes for VMs without handles - Build infrastructure: build-vm-init task with CGO_ENABLED=0 GOOS=linux, wired as dependency of the main build task Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 074326e commit 4b58bff

31 files changed

Lines changed: 2898 additions & 2 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,6 @@ coverage*
4747
crd-helm-wrapper
4848
cmd/vmcp/__debug_bin*
4949
/vmcp
50+
51+
# Embedded VM init binary (built by task build-vm-init)
52+
pkg/container/gomicrovm/initbin/thv-vm-init

Taskfile.yml

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,47 @@
11
version: '3'
22

3+
vars:
4+
GOMICROVM_VERSION:
5+
sh: go list -m github.com/stacklok/go-microvm | awk '{print $2}'
6+
37
includes:
48
operator:
59
taskfile: ./cmd/thv-operator/Taskfile.yml
610
flatten: true
711

812
tasks:
13+
fetch-runtime:
14+
desc: Download pre-built go-microvm runtime (go-microvm-runner) from GitHub Release
15+
platforms: [linux, darwin]
16+
status:
17+
- test -f bin/go-microvm-runner
18+
cmds:
19+
- mkdir -p bin
20+
- |
21+
if ! command -v gh >/dev/null 2>&1; then
22+
echo "NOTE: gh CLI not found -- skipping go-microvm-runner download."
23+
echo " Install gh (https://cli.github.com) and run 'task fetch-runtime',"
24+
echo " or download go-microvm-runner manually from:"
25+
echo " https://github.com/stacklok/go-microvm/releases/tag/{{.GOMICROVM_VERSION}}"
26+
exit 0
27+
fi
28+
os="$(uname -s | tr '[:upper:]' '[:lower:]')"
29+
arch="$(uname -m)"
30+
case "$arch" in
31+
x86_64) arch="amd64" ;;
32+
aarch64) arch="arm64" ;;
33+
esac
34+
archive="go-microvm-runtime-${os}-${arch}.tar.gz"
35+
echo "Downloading go-microvm runtime {{.GOMICROVM_VERSION}} (${os}/${arch})..."
36+
gh release download {{.GOMICROVM_VERSION}} \
37+
--repo stacklok/go-microvm \
38+
--dir bin/ \
39+
--clobber \
40+
--pattern "${archive}"
41+
tar -xzf "bin/${archive}" -C bin/ --strip-components=1
42+
rm -f "bin/${archive}"
43+
echo "Installed bin/go-microvm-runner"
44+
945
docs:
1046
desc: Regenerate the docs
1147
deps: [swagger-install, helm-docs]
@@ -202,9 +238,23 @@ tasks:
202238
desc: Run all tests (unit, integration, and e2e)
203239
deps: [test, test-integration, test-e2e]
204240

241+
build-vm-init:
242+
desc: Cross-compile the thv-vm-init guest binary for Linux
243+
sources:
244+
- cmd/thv-vm-init/**/*.go
245+
generates:
246+
- pkg/container/gomicrovm/initbin/thv-vm-init
247+
env:
248+
CGO_ENABLED: "0"
249+
GOOS: linux
250+
# GOARCH is intentionally not set — it inherits the host architecture.
251+
# libkrun runs guest VMs with the same arch as the host (no cross-arch).
252+
cmds:
253+
- go build -ldflags "-s -w" -o pkg/container/gomicrovm/initbin/thv-vm-init ./cmd/thv-vm-init
254+
205255
build:
206256
desc: Build the binary
207-
deps: [gen]
257+
deps: [gen, build-vm-init, fetch-runtime]
208258
vars:
209259
VERSION:
210260
sh: git describe --tags --dirty --match "v*" 2>/dev/null || echo "dev"
@@ -224,6 +274,7 @@ tasks:
224274

225275
install:
226276
desc: Install the thv binary to GOPATH/bin
277+
deps: [build-vm-init, fetch-runtime]
227278
vars:
228279
VERSION:
229280
sh: git describe --tags --dirty --match "v*" 2>/dev/null || echo "dev"
@@ -232,6 +283,13 @@ tasks:
232283
BUILD_DATE: '{{dateInZone "2006-01-02T15:04:05Z" (now) "UTC"}}'
233284
cmds:
234285
- go install -ldflags "-s -w -X github.com/stacklok/toolhive/pkg/versions.Version={{.VERSION}} -X github.com/stacklok/toolhive/pkg/versions.Commit={{.COMMIT}} -X github.com/stacklok/toolhive/pkg/versions.BuildDate={{.BUILD_DATE}}" -v ./cmd/thv
286+
- cmd: |
287+
if [ -f bin/go-microvm-runner ]; then
288+
gobin="$(go env GOPATH)/bin"
289+
cp bin/go-microvm-runner "$gobin/go-microvm-runner"
290+
echo "Installed go-microvm-runner to $gobin/"
291+
fi
292+
platforms: [linux, darwin]
235293
236294
build-vmcp:
237295
desc: Build the vmcp binary

cmd/thv-vm-init/entrypoint.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build linux
5+
6+
package main
7+
8+
import (
9+
"encoding/json"
10+
"fmt"
11+
"os"
12+
)
13+
14+
// DefaultEntrypointPath is the guest path where the OCI entrypoint config is
15+
// injected by the InjectEntrypoint rootfs hook.
16+
const DefaultEntrypointPath = "/etc/thv-entrypoint.json"
17+
18+
// entrypointConfig holds the original OCI command, environment, and working
19+
// directory captured from the container image before the init override replaced
20+
// the default entrypoint.
21+
type entrypointConfig struct {
22+
Cmd []string `json:"cmd"`
23+
Env []string `json:"env,omitempty"`
24+
WorkingDir string `json:"working_dir,omitempty"`
25+
}
26+
27+
// loadEntrypoint reads and parses an entrypoint config file.
28+
func loadEntrypoint(path string) (*entrypointConfig, error) {
29+
data, err := os.ReadFile(path) //nolint:gosec // path is a trusted guest-internal config path
30+
if err != nil {
31+
return nil, fmt.Errorf("reading entrypoint config: %w", err)
32+
}
33+
var cfg entrypointConfig
34+
if err := json.Unmarshal(data, &cfg); err != nil {
35+
return nil, fmt.Errorf("parsing entrypoint config: %w", err)
36+
}
37+
if len(cfg.Cmd) == 0 {
38+
return nil, fmt.Errorf("entrypoint config has empty cmd")
39+
}
40+
return &cfg, nil
41+
}

cmd/thv-vm-init/entrypoint_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build linux
5+
6+
package main
7+
8+
import (
9+
"os"
10+
"path/filepath"
11+
"testing"
12+
13+
"github.com/stretchr/testify/require"
14+
)
15+
16+
func TestLoadEntrypoint(t *testing.T) {
17+
t.Parallel()
18+
19+
tests := []struct {
20+
name string
21+
content string
22+
wantCmd []string
23+
wantErr string
24+
}{
25+
{
26+
name: "valid full config",
27+
content: `{"cmd":["/usr/bin/mcp-server","--port","8080"],"env":["FOO=bar"],"working_dir":"/app"}`,
28+
wantCmd: []string{"/usr/bin/mcp-server", "--port", "8080"},
29+
},
30+
{
31+
name: "valid minimal config",
32+
content: `{"cmd":["python3","server.py"]}`,
33+
wantCmd: []string{"python3", "server.py"},
34+
},
35+
{
36+
name: "empty cmd",
37+
content: `{"cmd":[]}`,
38+
wantErr: "entrypoint config has empty cmd",
39+
},
40+
{
41+
name: "null cmd",
42+
content: `{"cmd":null}`,
43+
wantErr: "entrypoint config has empty cmd",
44+
},
45+
{
46+
name: "invalid JSON",
47+
content: `{not valid json}`,
48+
wantErr: "parsing entrypoint config",
49+
},
50+
}
51+
52+
for _, tt := range tests {
53+
t.Run(tt.name, func(t *testing.T) {
54+
t.Parallel()
55+
dir := t.TempDir()
56+
path := filepath.Join(dir, "entrypoint.json")
57+
require.NoError(t, os.WriteFile(path, []byte(tt.content), 0o644))
58+
59+
cfg, err := loadEntrypoint(path)
60+
if tt.wantErr != "" {
61+
require.ErrorContains(t, err, tt.wantErr)
62+
return
63+
}
64+
require.NoError(t, err)
65+
require.Equal(t, tt.wantCmd, cfg.Cmd)
66+
})
67+
}
68+
}
69+
70+
func TestLoadEntrypoint_MissingFile(t *testing.T) {
71+
t.Parallel()
72+
_, err := loadEntrypoint("/nonexistent/path/entrypoint.json")
73+
require.ErrorContains(t, err, "reading entrypoint config")
74+
}

cmd/thv-vm-init/main.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build linux
5+
6+
// thv-vm-init is the PID 1 init process for ToolHive go-microvm guest VMs.
7+
// It boots the guest (mounts, DHCP, SSH), reads the original OCI entrypoint
8+
// from /etc/thv-entrypoint.json, starts the MCP server as a child process,
9+
// and forwards termination signals.
10+
package main
11+
12+
import (
13+
"context"
14+
"log/slog"
15+
"os"
16+
"os/exec"
17+
"os/signal"
18+
"syscall"
19+
"time"
20+
21+
"github.com/stacklok/go-microvm/guest/boot"
22+
"github.com/stacklok/go-microvm/guest/reaper"
23+
)
24+
25+
const shutdownTimeout = 5 * time.Second
26+
27+
func main() {
28+
logger := slog.New(slog.NewTextHandler(os.Stderr, nil))
29+
30+
// PID 1 must reap orphaned children.
31+
stopReaper := reaper.Start(logger)
32+
defer stopReaper()
33+
34+
// Boot: essential mounts, DHCP networking, SSH server.
35+
shutdown, err := boot.Run(logger,
36+
boot.WithSSHKeysPath("/root/.ssh/authorized_keys"),
37+
boot.WithEnvFilePath("/etc/environment"),
38+
)
39+
if err != nil {
40+
logger.Error("boot failed", "error", err)
41+
halt()
42+
return
43+
}
44+
45+
// Load the original OCI entrypoint that was captured by the
46+
// InjectEntrypoint rootfs hook before WithInitOverride replaced it.
47+
ep, err := loadEntrypoint(DefaultEntrypointPath)
48+
if err != nil {
49+
logger.Error("failed to load entrypoint", "error", err)
50+
gracefulHalt(logger, shutdown)
51+
return
52+
}
53+
54+
// Start the MCP server as a child process.
55+
cmd := exec.Command(ep.Cmd[0], ep.Cmd[1:]...) //nolint:gosec // cmd comes from trusted OCI config
56+
cmd.Stdout = os.Stdout
57+
cmd.Stderr = os.Stderr
58+
cmd.Env = append(os.Environ(), ep.Env...)
59+
if ep.WorkingDir != "" {
60+
cmd.Dir = ep.WorkingDir
61+
}
62+
63+
if err := cmd.Start(); err != nil {
64+
logger.Error("failed to start MCP server", "error", err, "cmd", ep.Cmd)
65+
gracefulHalt(logger, shutdown)
66+
return
67+
}
68+
logger.Info("MCP server started", "pid", cmd.Process.Pid, "cmd", ep.Cmd)
69+
70+
// Forward SIGTERM/SIGINT to the child process.
71+
sig := make(chan os.Signal, 1)
72+
signal.Notify(sig, syscall.SIGTERM, syscall.SIGINT)
73+
go func() {
74+
for received := range sig {
75+
logger.Info("received signal, forwarding to child", "signal", received)
76+
_ = cmd.Process.Signal(received)
77+
}
78+
}()
79+
80+
// Wait for the child to exit.
81+
if err := cmd.Wait(); err != nil {
82+
if exitErr, ok := err.(*exec.ExitError); ok {
83+
logger.Info("MCP server exited", "code", exitErr.ExitCode())
84+
} else {
85+
logger.Error("error waiting for MCP server", "error", err)
86+
}
87+
}
88+
89+
gracefulHalt(logger, shutdown)
90+
}
91+
92+
// gracefulHalt shuts down the boot services with a timeout and then halts the VM.
93+
func gracefulHalt(logger *slog.Logger, shutdown func()) {
94+
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
95+
defer cancel()
96+
97+
done := make(chan struct{})
98+
go func() {
99+
shutdown()
100+
close(done)
101+
}()
102+
103+
select {
104+
case <-done:
105+
logger.Info("shutdown complete")
106+
case <-ctx.Done():
107+
logger.Warn("shutdown timed out")
108+
}
109+
110+
halt()
111+
}
112+
113+
// halt powers off the VM. As PID 1 inside a VM, Reboot with POWER_OFF
114+
// is the clean way to stop — calling os.Exit() would cause a kernel panic.
115+
func halt() {
116+
_ = syscall.Reboot(syscall.LINUX_REBOOT_CMD_POWER_OFF)
117+
}

0 commit comments

Comments
 (0)