diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..f687ae0
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,35 @@
+# Copy to .env (gitignored) and fill in your values:
+#   cp .env.example .env
+#
+# Sourced from the repo root by the scripts under scripts/. Each
+# script bails with `set GCP_PROJECT in .env` (or similar) if a
+# required value is unset.
+
+# === GCP appliance flow (scripts/appliance-qemu-to-gcp.sh) ===
+
+# GCP project where appliance images, VMs, and the persistent
+# data disk live. The bundled service account
+# (gcp-bootstrap-credentials.sh) must have roles/owner here.
+# Example: my-org-appliance
+GCP_PROJECT=
+
+# Path to the GCP service-account JSON. Created via
+# scripts/gcp-bootstrap-credentials.sh on a machine with
+# gcloud Owner access; copy the JSON to this path on the
+# build machine and chmod 600.
+# Example: ~/.config/y-cluster/gcp-appliance.key.json
+GCP_KEY=
+
+# === Hetzner Object Storage publish (scripts/appliance-publish-hetzner.sh) ===
+
+# Path to a file containing HCLOUD_TOKEN and Hetzner Object
+# Storage credentials. Format documented in the script's header.
+# Example: ~/.config/y-cluster/hetzner.env
+H_S3_ENV_FILE=
+
+# === Hetzner Cloud Packer flow (scripts/e2e-appliance-hetzner.sh) ===
+
+# Same file as H_S3_ENV_FILE when HCLOUD_TOKEN + S3 creds
+# co-locate; separate ENV_FILE var historically.
+# Example: ~/.config/y-cluster/hetzner.env
+ENV_FILE=
diff --git a/.gitignore b/.gitignore
index 3d0a7ca..e5be3bd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
 /kustomize-traverse
 /dist/
+
+# operator-local defaults sourced by the appliance scripts
+.env
diff --git a/scripts/appliance-build-hetzner.sh b/scripts/appliance-build-hetzner.sh
new file mode 100755
index 0000000..53893b9
--- /dev/null
+++ b/scripts/appliance-build-hetzner.sh
@@ -0,0 +1,416 @@
+#!/usr/bin/env bash
+# Build a y-cluster appliance interactively: stand up a local
+# qemu cluster with the same fixtures we'll ship, give the
+# operator a chance to poke at it, then on confirm run a
+# Packer-built Hetzner snapshot and provision a server from
+# it. Shows ssh + curl details for both stages.
+#
+# Why two clusters: the local one is for hands-on verification
+# (kubectl / ssh / poke). The Hetzner one is the actual handoff.
+# They're built from the same testdata fixtures, so verifying
+# locally proves the fixture set; Packer rebuilds the snapshot
+# fresh inside Hetzner. No round-trip artefact transfer between
+# the two -- they're independent builds with shared inputs.
+#
+# Two confirmations:
+#   1. "Local cluster looks good -- build Hetzner snapshot?"
+#   2. "Snapshot ready -- create server from snapshot?"
+# Either prompt aborts non-destructively. Aborting at (1)
+# leaves the local cluster up; aborting at (2) leaves the
+# Hetzner snapshot in your project for later use.
+
+[ -z "$DEBUG" ] || set -x
+set -eo pipefail
+
+YHELP='appliance-build-hetzner.sh - local verify -> confirm -> Packer snapshot -> confirm -> Hetzner server
+
+Usage: appliance-build-hetzner.sh
+
+Environment:
+  ENV_FILE          Hetzner credentials file (set in .env or shell env; required)
+  HCLOUD_TOKEN      Hetzner Cloud API token (sourced from ENV_FILE)
+  NAME              Local cluster name (default: appliance-hetzner-build)
+  APP_HTTP_PORT     Override host port for guest 80 (y-cluster default: 80)
+  APP_HTTPS_PORT    Override host port for guest 443 (y-cluster default: 443)
+  APP_API_PORT      Override host port for guest 6443 (y-cluster default: 6443)
+  APP_SSH_PORT      Override host port for guest 22 (y-cluster default: 2222)
+  SERVER_NAME       Hetzner server name (default: y-cluster-appliance)
+  SERVER_TYPE       Hetzner server type (default: cx23)
+  SERVER_LOCATION   Hetzner location (default: hel1)
+  SNAPSHOT_NAME     Packer snapshot description (default: y-cluster-appliance-<UTC>)
+  Y_CLUSTER         Path to dev binary (default: ./dist/y-cluster)
+  CACHE_DIR         Where y-cluster keeps its qcow2 (default: ~/.cache/y-cluster-qemu)
+  KEEP_LOCAL        Set to keep the local cluster after Hetzner deploy (default: tear down)
+  ASSUME_YES        Set to skip BOTH confirmations and proceed end-to-end
+
+Dependencies:
+  go, qemu-system-x86_64, kubectl, ssh, ssh-keygen, curl, packer, hcloud
+'
+
+case "${1:-}" in
+  help) echo "$YHELP"; exit 0 ;;
+  --help) echo "$YHELP"; exit 0 ;;
+  -h) echo "$YHELP"; exit 0 ;;
+esac
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ -f "$REPO_ROOT/.env" ]]; then
+    set -o allexport; . "$REPO_ROOT/.env"; set +o allexport
+fi
+
+: "${ENV_FILE:?set ENV_FILE in .env or shell env}"
+
+NAME="${NAME:-appliance-hetzner-build}"
+SERVER_NAME="${SERVER_NAME:-y-cluster-appliance}"
+SERVER_TYPE="${SERVER_TYPE:-cx23}"
+SERVER_LOCATION="${SERVER_LOCATION:-hel1}"
+SNAPSHOT_NAME="${SNAPSHOT_NAME:-y-cluster-appliance-$(date -u +%Y%m%d-%H%M%S)}"
+
+Y_CLUSTER="${Y_CLUSTER:-$REPO_ROOT/dist/y-cluster}"
+CACHE_DIR="${CACHE_DIR:-$HOME/.cache/y-cluster-qemu}"
+PACKER_TEMPLATE="$REPO_ROOT/scripts/e2e-appliance-hetzner.pkr.hcl"
+
+# Keep CFG_DIR stable + outside CACHE_DIR (the cleanup glob in the
+# qemu provisioner would otherwise match this directory and rm -f
+# would bail, killing the script under set -e). Same convention as
+# scripts/appliance-build-virtualbox.sh.
+CFG_DIR="${CFG_DIR:-$HOME/.cache/y-cluster-appliance-build/$NAME}"
+
+# Stable location for the per-deploy ssh key so the operator can
+# ssh into the Hetzner server later. Survives across script runs
+# unless they delete the file or run with a fresh SERVER_NAME.
+HCLOUD_KEY_DIR="$HOME/.cache/y-cluster-appliance-build/hetzner-keys"
+HCLOUD_KEY="$HCLOUD_KEY_DIR/$SERVER_NAME"
+
+stage() { printf '\n=== %s ===\n' "$*"; }
+
+confirm() {
+    local prompt=$1
+    if [[ -n "${ASSUME_YES:-}" ]]; then
+        echo "ASSUME_YES set; proceeding ($prompt)"
+        return 0
+    fi
+    read -r -p "$prompt [y/N] " answer
+    case "${answer,,}" in
+        y|yes) return 0 ;;
+        *) return 1 ;;
+    esac
+}
+
+cat <<'WARN'
+
+================================================================
+DEPRECATION WARNING
+
+scripts/appliance-build-hetzner.sh is on the way out.
+
+Hetzner Cloud has no public API for uploading custom disk
+images, so this script's "build a Hetzner snapshot" stage is
+a fresh build inside Hetzner via Packer -- the local-qemu
+verification you do first is fixture-equivalence, NOT the same
+disk that ships. That mismatches the appliance contract
+(local-built disk = disk that boots elsewhere).
+
+Replacement plan:
+  - scripts/appliance-qemu-to-gcp.sh (in progress) takes the
+    appliance contract path: provision local, export disk,
+    upload to GCP via `gcloud compute images import`, boot a
+    VM from that uploaded image. Same disk you verified
+    locally is the disk GCP runs.
+  - scripts/e2e-appliance-hetzner.sh is being repurposed once
+    a pkg/provision/hetzner/ provisioner exists; it will then
+    cover provision-on-Hetzner -> snapshot -> instantiate as
+    an end-to-end test of that provisioner shape.
+
+This script still runs. It still produces a working appliance
+on Hetzner. But the artefact you ship is built fresh on
+Hetzner, not transferred from your local verification.
+================================================================
+
+WARN
+confirm "Proceed with the Hetzner Packer flow anyway?" \
+    || { echo "aborted; no changes made."; exit 0; }
+
+
+for tool in go qemu-system-x86_64 kubectl ssh ssh-keygen curl packer hcloud; do
+    command -v "$tool" >/dev/null \
+        || { echo "missing required tool: $tool" >&2; exit 1; }
+done
+
+if [[ ! -f "$ENV_FILE" ]]; then
+    echo "missing env file: $ENV_FILE (need HCLOUD_TOKEN)" >&2
+    exit 1
+fi
+# shellcheck disable=SC1090
+source "$ENV_FILE"
+[[ -n "${HCLOUD_TOKEN:-}" ]] || { echo "HCLOUD_TOKEN not set in $ENV_FILE" >&2; exit 1; }
+export HCLOUD_TOKEN
+
+# === Build dev binary (linux/amd64 because Packer uploads it) ===
+stage "building linux/amd64 dev binary -> $Y_CLUSTER"
+mkdir -p "$(dirname "$Y_CLUSTER")"
+( cd "$REPO_ROOT" && GOOS=linux GOARCH=amd64 go build -o "$Y_CLUSTER" ./cmd/y-cluster )
+
+# === Local config ===
+mkdir -p "$CFG_DIR"
+# YAML emission omits any port the operator didn't override, letting
+# y-cluster's Go binary apply its own defaults (sshPort=2222,
+# portForwards={6443:6443, 80:80, 443:443}).
+{
+    echo "provider: qemu"
+    echo "name: $NAME"
+    echo "context: $NAME"
+    [ -n "${APP_SSH_PORT:-}" ] && printf 'sshPort: "%s"\n' "$APP_SSH_PORT"
+    echo 'memory: "4096"'
+    echo 'cpus: "2"'
+    echo 'diskSize: "40G"'
+    if [ -n "${APP_HTTP_PORT:-}" ] || [ -n "${APP_HTTPS_PORT:-}" ] || [ -n "${APP_API_PORT:-}" ]; then
+        echo "portForwards:"
+        [ -n "${APP_API_PORT:-}" ]   && printf '  - host: "%s"\n    guest: "6443"\n' "$APP_API_PORT"
+        [ -n "${APP_HTTP_PORT:-}" ]  && printf '  - host: "%s"\n    guest: "80"\n'   "$APP_HTTP_PORT"
+        [ -n "${APP_HTTPS_PORT:-}" ] && printf '  - host: "%s"\n    guest: "443"\n'  "$APP_HTTPS_PORT"
+    fi
+} > "$CFG_DIR/y-cluster-provision.yaml"
+
+# === Stage 1: local provision + install + smoketest ===
+stage "tearing down any leftover $NAME cluster"
+"$Y_CLUSTER" teardown -c "$CFG_DIR" || true # y-script-lint:disable=or-true # idempotent re-entry: missing cluster is not an error
+
+stage "provisioning local appliance ($NAME) -- k3s + Envoy Gateway"
+"$Y_CLUSTER" provision -c "$CFG_DIR"
+
+stage "installing echo workload"
+"$Y_CLUSTER" echo render \
+    | kubectl --context="$NAME" apply --server-side --field-manager=customer-install -f -
+kubectl --context="$NAME" -n y-cluster wait \
+    --for=condition=Available deployment/echo --timeout=180s
+
+stage "installing VersityGW StatefulSet via yconverge"
+"$Y_CLUSTER" yconverge --context="$NAME" \
+    -k "$REPO_ROOT/testdata/appliance-stateful/base"
+
+stage "smoketest: echo + s3"
+probe_local() {
+    local what=$1 url=$2 attempts=${3:-30}
+    for i in $(seq 1 "$attempts"); do
+        if curl -fsS --max-time 8 -o /dev/null -w "  $what HTTP %{http_code}\n" "$url"; then
+            return 0
+        fi
+        echo "  $what attempt $i/$attempts: no answer yet"
+        sleep 5
+    done
+    echo "$what smoketest never succeeded; aborting" >&2
+    return 1
+}
+probe_local echo "http://127.0.0.1:${APP_HTTP_PORT:-80}/q/envoy/echo"
+probe_local s3   "http://127.0.0.1:${APP_HTTP_PORT:-80}/s3/health"
+
+cat <<EOF
+
+================================================================
+Local cluster $NAME is up. Verify with:
+
+  HTTP (echo + s3):  http://127.0.0.1:${APP_HTTP_PORT:-80}/q/envoy/echo
+                     http://127.0.0.1:${APP_HTTP_PORT:-80}/s3/health
+  Kubernetes API:    https://127.0.0.1:${APP_API_PORT:-6443}
+  kubectl context:   $NAME
+
+  kubectl --context=$NAME get nodes -o wide
+  kubectl --context=$NAME get pods -A
+  kubectl --context=$NAME -n appliance-stateful get statefulset,pvc,pv
+
+  ssh -i $CACHE_DIR/$NAME-ssh -p ${APP_SSH_PORT:-2222} \\
+      -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \\
+      ystack@127.0.0.1
+
+Hetzner deploy uses the same testdata fixtures, rebuilt fresh
+on Hetzner via Packer (no artefact transfer between them; this
+is an independence-of-builds verification).
+================================================================
+
+EOF
+
+confirm "Local cluster looks good -- build Hetzner snapshot?" \
+    || { echo "aborted; local cluster left running. Teardown with: $Y_CLUSTER teardown -c $CFG_DIR"; exit 0; }
+
+# === Stage 2: Packer build a Hetzner snapshot ===
+# Tear down the local cluster before Packer to free local
+# resources -- it's not used in the rest of the flow. Skip
+# this if the operator set KEEP_LOCAL.
+if [[ -z "${KEEP_LOCAL:-}" ]]; then
+    stage "tearing down local cluster (set KEEP_LOCAL=1 to keep it)"
+    "$Y_CLUSTER" teardown -c "$CFG_DIR" 2>/dev/null || true # y-script-lint:disable=or-true # cleanup best-effort
+fi
+
+# Pre-render the kustomize bases for Packer (the build VM doesn't
+# have y-cluster, so it can't run yconverge; concat both module
+# outputs into a single kubectl-applyable file). Same shape as
+# scripts/e2e-appliance-hetzner.sh.
+STATEFUL_MANIFEST=$(mktemp -t appliance-stateful.XXXXXX.yaml)
+{
+    kubectl kustomize "$REPO_ROOT/testdata/appliance-stateful/namespace"
+    echo '---'
+    kubectl kustomize "$REPO_ROOT/testdata/appliance-stateful/base"
+} > "$STATEFUL_MANIFEST"
+
+LOCALSTORAGE_MANIFEST=$(mktemp -t y-cluster-localstorage.XXXXXX.yaml)
+"$Y_CLUSTER" localstorage render > "$LOCALSTORAGE_MANIFEST"
+
+trap 'rm -f "$STATEFUL_MANIFEST" "$LOCALSTORAGE_MANIFEST"' EXIT
+
+stage "packer init"
+packer init "$PACKER_TEMPLATE"
+
+stage "packer build (creates a temporary $SERVER_TYPE in $SERVER_LOCATION, snapshots, deletes)"
+packer build \
+    -var "snapshot_name=$SNAPSHOT_NAME" \
+    -var "server_type=$SERVER_TYPE" \
+    -var "location=$SERVER_LOCATION" \
+    -var "y_cluster_binary=$Y_CLUSTER" \
+    -var "prepare_script=$REPO_ROOT/pkg/provision/qemu/prepare_inguest.sh" \
+    -var "stateful_manifest=$STATEFUL_MANIFEST" \
+    -var "localstorage_manifest=$LOCALSTORAGE_MANIFEST" \
+    "$PACKER_TEMPLATE"
+
+# Resolve snapshot id from the description we gave Packer.
+stage "resolving snapshot id for $SNAPSHOT_NAME"
+SNAPSHOT_ID=$(hcloud image list \
+    --type=snapshot \
+    --selector="purpose=y-cluster-appliance" \
+    --output=json \
+    | python3 -c "
+import json, sys
+images = json.load(sys.stdin)
+matches = [i for i in images if i.get('description') == '$SNAPSHOT_NAME']
+if not matches:
+    sys.exit('no snapshot named $SNAPSHOT_NAME found')
+print(matches[0]['id'])
+")
+echo "  snapshot id: $SNAPSHOT_ID"
+
+cat <<EOF
+
+================================================================
+Snapshot ready: $SNAPSHOT_ID ($SNAPSHOT_NAME)
+
+This snapshot is reusable -- you can clone any number of
+servers from it without rebuilding.
+
+Next: create a $SERVER_TYPE server in $SERVER_LOCATION named
+"$SERVER_NAME" from this snapshot. Aborting now leaves the
+snapshot in your Hetzner project; spin it up later with:
+
+  hcloud server create --name $SERVER_NAME \\
+      --type $SERVER_TYPE --location $SERVER_LOCATION \\
+      --image $SNAPSHOT_ID --ssh-key <your-key>
+================================================================
+
+EOF
+
+confirm "Create Hetzner server from snapshot $SNAPSHOT_ID?" \
+    || { echo "aborted; snapshot $SNAPSHOT_ID preserved for later use."; exit 0; }
+
+# === Stage 3: create server + verify ===
+mkdir -p "$HCLOUD_KEY_DIR"
+chmod 700 "$HCLOUD_KEY_DIR"
+if [[ ! -f "$HCLOUD_KEY" ]]; then
+    ssh-keygen -t ed25519 -N '' -C "$SERVER_NAME-$$" -f "$HCLOUD_KEY" -q
+fi
+KEY_NAME="$SERVER_NAME"
+
+stage "tearing down any leftover server / key from a prior run"
+hcloud server delete "$SERVER_NAME" 2>/dev/null || true # y-script-lint:disable=or-true # idempotent cleanup: missing server is not an error
+hcloud ssh-key delete "$KEY_NAME" 2>/dev/null || true # y-script-lint:disable=or-true # idempotent cleanup: missing key is not an error
+
+stage "registering ssh public key as $KEY_NAME"
+hcloud ssh-key create --name "$KEY_NAME" --public-key-from-file "$HCLOUD_KEY.pub" >/dev/null
+
+stage "creating $SERVER_NAME from snapshot $SNAPSHOT_ID"
+hcloud server create \
+    --name "$SERVER_NAME" \
+    --type "$SERVER_TYPE" \
+    --image "$SNAPSHOT_ID" \
+    --location "$SERVER_LOCATION" \
+    --ssh-key "$KEY_NAME" \
+    >/dev/null
+PUBLIC_IP=$(hcloud server ip "$SERVER_NAME")
+echo "  public ip: $PUBLIC_IP"
+
+# Wait for sshd, then probe the workload endpoints.
+SSH_OPTS="-i $HCLOUD_KEY -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5"
+echo "  waiting for ssh on $PUBLIC_IP:22"
+for _ in $(seq 1 60); do
+    # shellcheck disable=SC2086
+    if ssh $SSH_OPTS root@"$PUBLIC_IP" 'true' 2>/dev/null; then
+        break
+    fi
+    sleep 5
+done
+
+# Cold boot from snapshot: cloud-init -> k3s.service first start ->
+# envoy gateway controller + data plane -> VersityGW StatefulSet
+# rebinds its PV -> klipper-lb binds :80. Generous loop.
+probe_remote() {
+    local what=$1 url=$2 attempts=${3:-60}
+    local out
+    out=$(mktemp)
+    for i in $(seq 1 "$attempts"); do
+        if curl -fsS --max-time 8 -o "$out" -w "  $what HTTP %{http_code}\n" "$url"; then
+            echo
+            echo "=== $what response (head) ==="
+            head -25 "$out"
+            echo
+            rm -f "$out"
+            return 0
+        fi
+        echo "  $what attempt $i/$attempts: no answer yet"
+        sleep 10
+    done
+    rm -f "$out"
+    return 1
+}
+
+stage "probing http://$PUBLIC_IP -- echo + s3"
+if probe_remote echo "http://$PUBLIC_IP/q/envoy/echo" \
+    && probe_remote s3 "http://$PUBLIC_IP/s3/health"; then
+    cat <<EOF
+
+================================================================
+Hetzner appliance is live and serving.
+
+  Public IP:   $PUBLIC_IP
+  Server:      $SERVER_NAME ($SERVER_TYPE in $SERVER_LOCATION)
+  Snapshot:    $SNAPSHOT_ID
+
+Endpoints (unauthenticated for now):
+  echo:        http://$PUBLIC_IP/q/envoy/echo
+  s3 health:   http://$PUBLIC_IP/s3/health
+
+SSH (root, key-only):
+  ssh -i $HCLOUD_KEY root@$PUBLIC_IP
+
+kubectl from your laptop:
+  ssh -i $HCLOUD_KEY root@$PUBLIC_IP cat /etc/rancher/k3s/k3s.yaml \\
+    | sed "s|server: .*|server: https://$PUBLIC_IP:6443|" > k3s-$SERVER_NAME.yaml
+  KUBECONFIG=k3s-$SERVER_NAME.yaml kubectl get nodes
+  (k3s's apiserver isn't open to the internet by default; either
+   add 6443 to the Hetzner firewall, or tunnel via ssh:
+   ssh -L 6443:127.0.0.1:6443 -N root@$PUBLIC_IP &)
+
+When you're done:
+  hcloud server delete $SERVER_NAME
+  hcloud ssh-key delete $KEY_NAME
+  hcloud image delete $SNAPSHOT_ID    # optional; snapshot is reusable
+================================================================
+EOF
+    exit 0
+fi
+
+echo >&2
+echo "echo never answered. Server $SERVER_NAME left running for diagnosis:" >&2
+# shellcheck disable=SC2086
+ssh $SSH_OPTS root@"$PUBLIC_IP" 'systemctl is-active k3s; kubectl get pods -A 2>&1 | head -30' >&2 \
+    || true # y-script-lint:disable=or-true # diagnostic best-effort
+echo "  ssh: ssh -i $HCLOUD_KEY root@$PUBLIC_IP" >&2
+echo "  destroy: hcloud server delete $SERVER_NAME" >&2
+exit 1
diff --git a/scripts/appliance-build-virtualbox.sh b/scripts/appliance-build-virtualbox.sh
new file mode 100755
index 0000000..a7ed227
--- /dev/null
+++ b/scripts/appliance-build-virtualbox.sh
@@ -0,0 +1,250 @@
+#!/usr/bin/env bash
+# Build a y-cluster appliance and pause for hands-on testing
+# before exporting a VirtualBox-friendly bundle.
+#
+# Same provision shape as scripts/e2e-appliance-export-import.sh:
+# qemu provider, k3s + Envoy Gateway, echo workload, VersityGW
+# StatefulSet (covers stateful PV path). Then it stops, prints
+# kubectl + ssh access info, and waits for the operator to
+# confirm before running prepare-export + export.
+#
+# Why interactive: the VirtualBox handoff is precious. We want
+# the operator to sanity-check the live cluster before we lock
+# the disk for export and (optionally) tear it down. Yes lets
+# y-cluster prepare-export + export run; "no" leaves the cluster
+# up for further poking (and prints the teardown command).
+#
+# The bundled VMDK uses subformat=monolithicSparse, which
+# imports more cleanly under VirtualBox's "Use Existing Virtual
+# Hard Disk File" than the streamOptimized default that ships
+# for ESXi. The README inside the bundle documents both.
+
+[ -z "$DEBUG" ] || set -x
+set -eo pipefail
+
+YHELP='appliance-build-virtualbox.sh - provision -> install -> pause -> export VirtualBox-friendly VMDK
+
+Usage: appliance-build-virtualbox.sh [bundle-dir]
+
+Positional:
+  bundle-dir   Where to write the export bundle. Default:
+               ./dist/appliance-virtualbox/<NAME>-<timestamp>
+
+Environment:
+  NAME             Appliance name (default: appliance-virtualbox)
+  APP_HTTP_PORT    Override host port for guest 80 (y-cluster default: 80)
+  APP_HTTPS_PORT   Override host port for guest 443 (y-cluster default: 443)
+  APP_API_PORT     Override host port for guest 6443 (y-cluster default: 6443)
+  APP_SSH_PORT     Override host port for guest 22 (y-cluster default: 2222)
+  Y_CLUSTER        Path to dev binary (default: ./dist/y-cluster)
+  CACHE_DIR        Where y-cluster keeps its qcow2 (default: ~/.cache/y-cluster-qemu)
+  KEEP_CLUSTER     Set to keep the cluster alive after export (default: tear it down)
+  SKIP_PROVISION   Set to skip provision + install (resume into the prompt against
+                   an already-running cluster of the same NAME)
+  ASSUME_YES       Set to skip the interactive prompt and proceed to export
+
+Dependencies:
+  go, qemu-system-x86_64, qemu-img, kubectl, ssh, ssh-keygen, curl, virt-sysprep
+'
+
+case "${1:-}" in
+  help) echo "$YHELP"; exit 0 ;;
+  --help) echo "$YHELP"; exit 0 ;;
+  -h) echo "$YHELP"; exit 0 ;;
+esac
+
+NAME="${NAME:-appliance-virtualbox}"
+
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+Y_CLUSTER="${Y_CLUSTER:-$REPO_ROOT/dist/y-cluster}"
+CACHE_DIR="${CACHE_DIR:-$HOME/.cache/y-cluster-qemu}"
+
+DEFAULT_BUNDLE="$REPO_ROOT/dist/appliance-virtualbox/$NAME-$(date -u +%Y%m%dT%H%M%SZ)"
+BUNDLE_DIR="${1:-$DEFAULT_BUNDLE}"
+
+# CFG_DIR lives OUTSIDE $CACHE_DIR on purpose: the cleanup glob
+# below ("$CACHE_DIR/$NAME-"*) would otherwise match a config
+# directory whose name starts with $NAME, and rm -f bails on
+# directories under set -e. Keep it stable (not mktemp -d) so
+# SKIP_PROVISION can resume against an existing cluster.
+CFG_DIR="${CFG_DIR:-$HOME/.cache/y-cluster-appliance-build/$NAME}"
+
+stage() { printf '\n=== %s ===\n' "$*"; }
+
+for tool in go qemu-system-x86_64 qemu-img kubectl ssh ssh-keygen curl virt-sysprep; do
+    command -v "$tool" >/dev/null \
+        || { echo "missing required tool: $tool" >&2; exit 1; }
+done
+
+# virt-sysprep needs to read /boot/vmlinuz-* (libguestfs supermin
+# builds an appliance VM with the host kernel). Ubuntu installs
+# kernel images 0600 root, so non-root invocations bail with an
+# opaque "supermin exited with error status 1". Surface the fix.
+if ! [ -r /boot/vmlinuz-"$(uname -r)" ]; then
+    cat >&2 <<EOF
+/boot/vmlinuz-$(uname -r) is not readable; virt-sysprep will fail.
+Fix one of:
+  sudo chmod +r /boot/vmlinuz-*
+  sudo dpkg-statoverride --update --add root root 0644 /boot/vmlinuz-$(uname -r)
+EOF
+    exit 1
+fi
+
+# === Build dev binary ===
+stage "building dev binary -> $Y_CLUSTER"
+mkdir -p "$(dirname "$Y_CLUSTER")"
+( cd "$REPO_ROOT" && go build -o "$Y_CLUSTER" ./cmd/y-cluster )
+
+# === Config (always written; teardown + prepare-export need it) ===
+mkdir -p "$CFG_DIR"
+# YAML emission omits any port the operator didn't override, letting
+# y-cluster's Go binary apply its own defaults (sshPort=2222,
+# portForwards={6443:6443, 80:80, 443:443}).
+{
+    echo "provider: qemu"
+    echo "name: $NAME"
+    echo "context: $NAME"
+    [ -n "${APP_SSH_PORT:-}" ] && printf 'sshPort: "%s"\n' "$APP_SSH_PORT"
+    echo 'memory: "4096"'
+    echo 'cpus: "2"'
+    echo 'diskSize: "40G"'
+    if [ -n "${APP_HTTP_PORT:-}" ] || [ -n "${APP_HTTPS_PORT:-}" ] || [ -n "${APP_API_PORT:-}" ]; then
+        echo "portForwards:"
+        [ -n "${APP_API_PORT:-}" ]   && printf '  - host: "%s"\n    guest: "6443"\n' "$APP_API_PORT"
+        [ -n "${APP_HTTP_PORT:-}" ]  && printf '  - host: "%s"\n    guest: "80"\n'   "$APP_HTTP_PORT"
+        [ -n "${APP_HTTPS_PORT:-}" ] && printf '  - host: "%s"\n    guest: "443"\n'  "$APP_HTTPS_PORT"
+    fi
+} > "$CFG_DIR/y-cluster-provision.yaml"
+
+if [[ -z "${SKIP_PROVISION:-}" ]]; then
+    stage "tearing down any leftover $NAME cluster"
+    "$Y_CLUSTER" teardown -c "$CFG_DIR" || true # y-script-lint:disable=or-true # idempotent re-entry: missing cluster is not an error
+    rm -f "$CACHE_DIR/$NAME".* "$CACHE_DIR/$NAME-"*
+
+    stage "provisioning $NAME (k3s + Envoy Gateway)"
+    "$Y_CLUSTER" provision -c "$CFG_DIR"
+
+    stage "installing echo workload"
+    "$Y_CLUSTER" echo render \
+        | kubectl --context="$NAME" apply --server-side --field-manager=customer-install -f -
+    kubectl --context="$NAME" -n y-cluster wait \
+        --for=condition=Available deployment/echo --timeout=180s
+
+    stage "installing VersityGW StatefulSet via yconverge"
+    "$Y_CLUSTER" yconverge --context="$NAME" \
+        -k "$REPO_ROOT/testdata/appliance-stateful/base"
+
+    stage "smoketest: echo + s3"
+    probe() {
+        local what=$1 url=$2 attempts=${3:-30}
+        for i in $(seq 1 "$attempts"); do
+            if curl -fsS --max-time 8 -o /dev/null -w "  $what HTTP %{http_code}\n" "$url"; then
+                return 0
+            fi
+            echo "  $what attempt $i/$attempts: no answer yet"
+            sleep 5
+        done
+        echo "$what smoketest never succeeded; aborting" >&2
+        return 1
+    }
+    probe echo "http://127.0.0.1:${APP_HTTP_PORT:-80}/q/envoy/echo"
+    probe s3   "http://127.0.0.1:${APP_HTTP_PORT:-80}/s3/health"
+else
+    stage "SKIP_PROVISION set; resuming against existing $NAME cluster"
+fi
+
+# === Interactive pause for hands-on testing ===
+SSH_KEY="$CACHE_DIR/$NAME-ssh"
+
+cat <<EOF
+
+================================================================
+Cluster $NAME is up and ready for testing.
+
+  HTTP (echo + s3):  http://127.0.0.1:${APP_HTTP_PORT:-80}/q/envoy/echo
+                     http://127.0.0.1:${APP_HTTP_PORT:-80}/s3/health
+  Kubernetes API:    https://127.0.0.1:${APP_API_PORT:-6443}
+  kubectl context:   $NAME
+
+Quick checks:
+  kubectl --context=$NAME get nodes -o wide
+  kubectl --context=$NAME get pods -A
+  kubectl --context=$NAME -n appliance-stateful get statefulset,pvc,pv
+  curl -sf http://127.0.0.1:${APP_HTTP_PORT:-80}/q/envoy/echo
+
+SSH into the VM (passwordless sudo as ystack):
+  ssh -i $SSH_KEY -p ${APP_SSH_PORT:-2222} \\
+      -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \\
+      ystack@127.0.0.1
+
+Once you have finished poking around:
+  - Continue (y) to stop the VM, prepare-export, and write a
+    VirtualBox-friendly VMDK bundle to:
+      $BUNDLE_DIR
+  - Abort (n) to leave the cluster running. Tear down later with:
+      $Y_CLUSTER teardown -c $CFG_DIR
+================================================================
+
+EOF
+
+if [[ -n "${ASSUME_YES:-}" ]]; then
+    answer=y
+    echo "ASSUME_YES set; proceeding to export"
+else
+    read -r -p "Proceed to export? [y/N] " answer
+fi
+case "${answer,,}" in
+    y|yes) ;;
+    *) echo "aborting; cluster left running. Teardown with: $Y_CLUSTER teardown -c $CFG_DIR"; exit 0 ;;
+esac
+
+# === Stop + prepare-export ===
+stage "stopping cluster ($NAME)"
+"$Y_CLUSTER" stop --context="$NAME"
+
+stage "prepare-export ($NAME)"
+"$Y_CLUSTER" prepare-export --context="$NAME"
+
+# === Export OVA (VirtualBox-friendly) ===
+# OVA = uncompressed tar of (OVF descriptor + streamOptimized
+# VMDK). VirtualBox's File -> Import Appliance wizard accepts
+# only OVF / OVA, NOT raw VMDK -- so we ship OVA. The OVF
+# carries the CPU/RAM/NIC hints; VirtualBox just needs port
+# forwards added post-import.
+stage "exporting OVA (VirtualBox-importable) -> $BUNDLE_DIR"
+mkdir -p "$(dirname "$BUNDLE_DIR")"
+"$Y_CLUSTER" export \
+    --context="$NAME" \
+    --format=ova \
+    "$BUNDLE_DIR"
+
+ls -la "$BUNDLE_DIR/"
+echo
+echo "  bundled .ova members:"
+tar tvf "$BUNDLE_DIR/$NAME.ova" | sed 's/^/    /'
+
+cat <<EOF
+
+================================================================
+Bundle ready at: $BUNDLE_DIR
+
+Files:
+$(ls -1 "$BUNDLE_DIR" | sed 's/^/  /')
+
+VirtualBox import:
+  1. File -> Import Appliance -> select $BUNDLE_DIR/$NAME.ova
+  2. Confirm CPU / RAM / disk on the wizard (defaults come
+     from the OVF: $(awk '/cpus/{print $2}' "$CFG_DIR/y-cluster-provision.yaml") vCPU, $(awk '/memory/{print $2}' "$CFG_DIR/y-cluster-provision.yaml") MiB RAM)
+  3. After import: Network -> Adapter 1 -> Advanced -> Port
+     Forwarding, add:
+       ssh    TCP  host 2222 -> guest 22
+       http   TCP  host 8080 -> guest 80
+       https  TCP  host 8443 -> guest 443
+  4. Start. SSH key + access details in $BUNDLE_DIR/README.md
+================================================================
+EOF
+
+if [[ -z "${KEEP_CLUSTER:-}" ]]; then
+    stage "tearing down build-side cluster (set KEEP_CLUSTER=1 to keep it)"
+    "$Y_CLUSTER" teardown -c "$CFG_DIR" 2>/dev/null || true # y-script-lint:disable=or-true # cleanup best-effort
+fi
diff --git a/scripts/appliance-publish-hetzner.sh b/scripts/appliance-publish-hetzner.sh
new file mode 100755
index 0000000..397f274
--- /dev/null
+++ b/scripts/appliance-publish-hetzner.sh
@@ -0,0 +1,323 @@
+#!/usr/bin/env bash
+# Idempotently ensure a Hetzner Object Storage bucket exists,
+# configured to allow public GET on individual objects but NOT
+# bucket listing, then upload a single file and print its public
+# URL.
+#
+# Use case: the operator runs scripts/appliance-build-virtualbox.sh
+# to produce a VMDK bundle, then this script to publish the
+# bundle (or a tarball of it) at a URL their test host can curl
+# while staying anonymous.
+#
+# Hetzner Object Storage is S3-compatible; we shell out to the
+# AWS CLI pointed at https://<region>.your-objectstorage.com.
+# If `aws` is not installed locally we run the official image
+# via docker, which is universally available on dev machines.
+#
+# Credentials live in $H_S3_ENV_FILE (set in .env or shell env;
+# typically the same file that holds HCLOUD_TOKEN). The file
+# should set:
+#   H_S3_ACCESS_KEY=<from Hetzner Cloud Console -> Object
+#                    Storage -> Credentials>
+#   H_S3_SECRET_KEY=...
+#   H_S3_REGION=fsn1   # or hel1 / nbg1
+#   H_S3_BUCKET=...    # default bucket (script arg overrides)
+#
+# These are SEPARATE from HCLOUD_TOKEN: Object Storage is
+# managed under the same project but the API uses dedicated
+# S3 access/secret keys, not the Cloud API token. We co-locate
+# them in the same env file because they share a project, not
+# because they share an auth scheme.
+
+[ -z "$DEBUG" ] || set -x
+set -eo pipefail
+
+YHELP='appliance-publish-hetzner.sh - upload a file to a Hetzner Object Storage bucket with public-read on objects (no listing)
+
+Usage: appliance-publish-hetzner.sh <file> [object-key]
+
+Positional:
+  file         Local path to upload
+  object-key   Key to write under in the bucket (default: basename of file)
+
+Environment:
+  H_S3_ENV_FILE   Path to env file with H_S3_* vars (set in .env or shell env; required)
+  H_S3_BUCKET     Bucket name; overrides the env file. Required if not in env file.
+  H_S3_REGION     Region; overrides the env file (fsn1, hel1, or nbg1).
+  AWS_CLI         How to invoke aws. Default: local `aws` if on PATH,
+                  else `docker run --rm -i public.ecr.aws/aws-cli/aws-cli`.
+
+Examples:
+  # publish a fresh appliance bundle
+  ./scripts/appliance-publish-hetzner.sh \
+      dist/appliance-virtualbox/appliance-virtualbox-*/appliance-virtualbox.vmdk
+
+  # publish under a custom key
+  ./scripts/appliance-publish-hetzner.sh appliance.tar.gz releases/2026-05-01/appliance.tar.gz
+
+Dependencies:
+  curl, and one of: locally-installed `aws` (preferred) OR `docker`
+  (used to invoke public.ecr.aws/aws-cli/aws-cli when aws is missing)
+'
+
+case "${1:-}" in
+  help) echo "$YHELP"; exit 0 ;;
+  --help) echo "$YHELP"; exit 0 ;;
+  -h) echo "$YHELP"; exit 0 ;;
+  "") echo "$YHELP" >&2; exit 2 ;;
+esac
+
+INPUT="$1"
+KEY_OVERRIDE="${2:-}"
+
+stage() { printf '\n=== %s ===\n' "$*"; }
+
+if [[ ! -e "$INPUT" ]]; then
+    echo "path not found: $INPUT" >&2
+    exit 1
+fi
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ -f "$REPO_ROOT/.env" ]]; then
+    set -o allexport; . "$REPO_ROOT/.env"; set +o allexport
+fi
+
+: "${H_S3_ENV_FILE:?set H_S3_ENV_FILE in .env or shell env}"
+ENV_FILE="$H_S3_ENV_FILE"
+if [[ -f "$ENV_FILE" ]]; then
+    # shellcheck disable=SC1090
+    set -a; . "$ENV_FILE"; set +a
+else
+    echo "credentials file not found: $ENV_FILE" >&2
+    cat >&2 <<EOF
+Add the following lines (alongside HCLOUD_TOKEN if you already
+have y-cluster-hetzner.env in place):
+  H_S3_ACCESS_KEY=...
+  H_S3_SECRET_KEY=...
+  H_S3_REGION=fsn1
+  H_S3_BUCKET=your-bucket-name
+
+Get the access/secret pair from the Hetzner Cloud Console:
+  https://console.hetzner.com/projects/<id>/object-storage/credentials
+EOF
+    exit 1
+fi
+
+: "${H_S3_ACCESS_KEY:?H_S3_ACCESS_KEY not set in $ENV_FILE}"
+: "${H_S3_SECRET_KEY:?H_S3_SECRET_KEY not set in $ENV_FILE}"
+: "${H_S3_REGION:?H_S3_REGION not set in $ENV_FILE (fsn1, hel1, or nbg1)}"
+: "${H_S3_BUCKET:?H_S3_BUCKET not set; pass via env or env file}"
+
+# Enum-check H_S3_REGION before we hit the endpoint URL. A typo
+# silently lands on an invalid hostname; this catches it at
+# config-load time with a message naming the valid set.
+case "$H_S3_REGION" in
+    fsn1|hel1|nbg1) ;;
+    *)
+        echo "H_S3_REGION=$H_S3_REGION not recognised; valid: fsn1, hel1, nbg1" >&2
+        exit 1
+        ;;
+esac
+
+BUCKET="$H_S3_BUCKET"
+REGION="$H_S3_REGION"
+ENDPOINT="https://${REGION}.your-objectstorage.com"
+
+# === Decide what to upload ===
+# Two modes:
+#   bundle - INPUT is a directory that looks like a y-cluster
+#            bundle (or a file inside one, identified by a
+#            sibling README.md). We tar `-C parent dirname` so
+#            the tarball extracts to a sibling directory in the
+#            customer's CWD: `tar xzf <name>.tgz` produces
+#            `./<name>/{README.md, *.vmdk, *-ssh, *-ssh.pub}`.
+#   single - INPUT is a regular file with no bundle context.
+#            Upload as-is. Key defaults to its basename.
+# Bundle mode is preferred whenever a README.md sits next to
+# the disk file, so the operator can pass either the directory
+# or the .vmdk and get the same bundle-tarball result.
+SOURCE_FILE=""
+KEY=""
+BUNDLE_DIR=""
+
+if [[ -d "$INPUT" ]]; then
+    BUNDLE_DIR=$(realpath "$INPUT")
+elif [[ -f "$INPUT" && -f "$(dirname "$INPUT")/README.md" ]]; then
+    BUNDLE_DIR=$(realpath "$(dirname "$INPUT")")
+fi
+
+if [[ -n "$BUNDLE_DIR" ]]; then
+    bundle_name=$(basename "$BUNDLE_DIR")
+    bundle_parent=$(dirname "$BUNDLE_DIR")
+    # Write the tarball next to the bundle dir, NOT under /tmp.
+    # /tmp is tmpfs on most distros (~16 GB) and a 1.5 GiB
+    # appliance tarball easily exhausts it; bundle_parent is on
+    # the operator's chosen output volume where space matches
+    # the bundle size.
+    TGZ="$bundle_parent/.${bundle_name}.$$.tgz"
+    trap 'rm -f "$TGZ"' EXIT
+    stage "packing bundle $BUNDLE_DIR -> $TGZ"
+    tar -czf "$TGZ" -C "$bundle_parent" "$bundle_name"
+    SOURCE_FILE="$TGZ"
+    KEY="${KEY_OVERRIDE:-${bundle_name}.tgz}"
+else
+    SOURCE_FILE="$INPUT"
+    KEY="${KEY_OVERRIDE:-$(basename "$INPUT")}"
+fi
+
+PUBLIC_URL="https://${BUCKET}.${REGION}.your-objectstorage.com/${KEY}"
+
+# === Pick an AWS CLI invocation ===
+# Prefer a local `aws` to avoid pulling a 200MB image on every
+# run; fall back to docker so a fresh dev box doesn't have to
+# install awscli first.
+if [[ -n "${AWS_CLI:-}" ]]; then
+    : # operator override; trust it verbatim
+elif command -v aws >/dev/null; then
+    AWS_CLI="aws"
+elif command -v docker >/dev/null; then
+    # Mount /tmp because mktemp puts the policy + tarball there;
+    # mount $HOME so absolute paths under $HOME (typical y-cluster
+    # cache locations) resolve inside the container; -w $PWD +
+    # -v $PWD:$PWD lets relative paths the operator typed work.
+    AWS_CLI="docker run --rm -i \
+        -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION \
+        -v $HOME:$HOME -v $PWD:$PWD -v /tmp:/tmp -w $PWD \
+        public.ecr.aws/aws-cli/aws-cli"
+else
+    echo "neither 'aws' nor 'docker' found; install one or set AWS_CLI" >&2
+    exit 1
+fi
+
+export AWS_ACCESS_KEY_ID="$H_S3_ACCESS_KEY"
+export AWS_SECRET_ACCESS_KEY="$H_S3_SECRET_KEY"
+export AWS_DEFAULT_REGION="$REGION"
+
+aws_s3api() {
+    # shellcheck disable=SC2086
+    $AWS_CLI s3api --endpoint-url "$ENDPOINT" "$@"
+}
+aws_s3() {
+    # shellcheck disable=SC2086
+    $AWS_CLI s3 --endpoint-url "$ENDPOINT" "$@"
+}
+
+# === Ensure bucket exists ===
+# head-bucket exits 0 if the bucket exists and we have access,
+# nonzero with stderr "Not Found" / "Forbidden" otherwise. We
+# only auto-create on Not Found; Forbidden means a name clash
+# in another tenant and the operator should pick a different
+# bucket name.
+stage "checking bucket s3://$BUCKET (endpoint: $ENDPOINT)"
+head_err=$(mktemp)
+trap 'rm -f "$head_err"' EXIT
+if aws_s3api head-bucket --bucket "$BUCKET" 2>"$head_err"; then
+    echo "  bucket exists"
+else
+    if grep -qiE '404|Not Found|NoSuchBucket' "$head_err"; then
+        stage "creating bucket s3://$BUCKET"
+        # Hetzner rejects LocationConstraint=us-east-1 (the
+        # AWS-CLI default for create-bucket without
+        # --create-bucket-configuration). Hetzner-region values
+        # work as the LocationConstraint.
+        aws_s3api create-bucket \
+            --bucket "$BUCKET" \
+            --create-bucket-configuration "LocationConstraint=$REGION"
+    else
+        echo "head-bucket failed and not a 404:" >&2
+        cat "$head_err" >&2
+        exit 1
+    fi
+fi
+
+# === Apply public-read-on-objects, no-listing policy ===
+# This is the "anonymous can curl any individual object whose
+# key they already know, but cannot enumerate the bucket"
+# pattern. We allow only s3:GetObject on the
+# arn:aws:s3:::BUCKET/* resource; ListBucket on the bucket
+# itself is omitted, so anonymous LIST is denied.
+stage "applying public-read-objects policy"
+policy_file=$(mktemp)
+trap 'rm -f "$head_err" "$policy_file"' EXIT
+cat > "$policy_file" <<EOF
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "PublicReadObjects",
+      "Effect": "Allow",
+      "Principal": "*",
+      "Action": "s3:GetObject",
+      "Resource": "arn:aws:s3:::${BUCKET}/*"
+    }
+  ]
+}
+EOF
+
+# put-bucket-policy is idempotent: re-applying the same JSON is
+# a no-op as far as the visible behaviour is concerned. We
+# always re-apply so a manually-tweaked policy snaps back to
+# the documented shape.
+aws_s3api put-bucket-policy \
+    --bucket "$BUCKET" \
+    --policy "file://$policy_file"
+
+# Belt-and-braces: explicitly disable the per-object public-
+# access-block guards so the policy above takes effect even on
+# accounts where Hetzner default-blocks public ACL/policy.
+# Hetzner mirrors the AWS PublicAccessBlockConfiguration
+# subset; setting all four to false is the documented "I
+# really mean public" stance.
+aws_s3api put-public-access-block \
+    --bucket "$BUCKET" \
+    --public-access-block-configuration \
+        BlockPublicAcls=false,IgnorePublicAcls=false,BlockPublicPolicy=false,RestrictPublicBuckets=false \
+    2>/dev/null || true # y-script-lint:disable=or-true # not all S3-compat backends implement put-public-access-block; policy alone is sufficient on Hetzner
+
+# === Upload ===
+stage "uploading $SOURCE_FILE -> s3://$BUCKET/$KEY"
+size=$(stat -c '%s' "$SOURCE_FILE")
+echo "  size: $size bytes ($(numfmt --to=iec-i --suffix=B "$size" 2>/dev/null || echo "$size B"))"
+
+# `aws s3 cp` handles multipart for >8MB by default and prints
+# a progress bar to stderr; preferred over `s3api put-object`
+# for arbitrary-sized files (qcow2 / vmdk are easily >5GB).
+aws_s3 cp "$SOURCE_FILE" "s3://$BUCKET/$KEY"
+
+# === Verify the object is anonymously reachable ===
+# Use a fresh curl with no creds to confirm the policy actually
+# took effect; surfaces config drift (e.g. another script
+# overwriting the bucket policy) at publish time, not at
+# customer-download time.
+stage "verifying anonymous GET"
+http_code=$(curl -sI -o /dev/null -w '%{http_code}' "$PUBLIC_URL")
+if [[ "$http_code" != "200" ]]; then
+    echo "anonymous GET returned HTTP $http_code (expected 200)" >&2
+    echo "URL: $PUBLIC_URL" >&2
+    exit 1
+fi
+echo "  anonymous GET HTTP 200"
+
+# === Verify the bucket is NOT anonymously listable ===
+list_code=$(curl -sI -o /dev/null -w '%{http_code}' "https://${BUCKET}.${REGION}.your-objectstorage.com/")
+case "$list_code" in
+    403) echo "  anonymous LIST denied (HTTP 403): correct" ;;
+    200) echo "WARNING: anonymous LIST returned HTTP 200; the bucket is enumerable. Check the policy." >&2 ;;
+    *)   echo "  anonymous LIST returned HTTP $list_code" ;;
+esac
+
+cat <<EOF
+
+================================================================
+Published.
+
+  Public URL:  $PUBLIC_URL
+
+To download from a fresh host:
+  curl -fLO "$PUBLIC_URL"
+
+To delete later:
+  AWS_ACCESS_KEY_ID=... AWS_SECRET_ACCESS_KEY=... \\
+    aws s3 --endpoint-url $ENDPOINT rm s3://$BUCKET/$KEY
+================================================================
+EOF
diff --git a/scripts/appliance-qemu-to-gcp.sh b/scripts/appliance-qemu-to-gcp.sh
new file mode 100755
index 0000000..acdcd52
--- /dev/null
+++ b/scripts/appliance-qemu-to-gcp.sh
@@ -0,0 +1,1308 @@
+#!/usr/bin/env bash
+# Build a y-cluster appliance interactively, ship it to GCP.
+#
+# Stages (this is the appliance contract -- the disk you verify
+# locally is the disk that boots in GCP):
+#
+#   1. Provision local qemu cluster (k3s + Envoy Gateway).
+#   2. PROMPT 1: drop into a hands-on window where the
+#      operator applies their custom workloads via kubectl /
+#      yconverge against context $NAME, tests them, and
+#      confirms when satisfied.
+#   3. y-cluster prepare-export (snapshots reconciled Gateway
+#      state, clears the dns-hint-ip annotation, then stops the
+#      VM internally and runs the offline phase: virt-sysprep
+#      identity reset + timesyncd flip + netplan generic match).
+#   4. y-cluster export --format=gcp-tar -- packs the qcow2
+#      into <name>.tar.gz containing a single disk.raw, the
+#      shape Compute Engine custom images expect.
+#   5. PROMPT 2: confirm before any GCP-side write happens.
+#   6. Upload tarball to GCS (creates bucket on first run).
+#   7. gcloud compute images create from the GCS object
+#      (direct, no Cloud Build).
+#   8. gcloud compute firewall-rules create (idempotent) for
+#      tcp:80 + tcp:443 on tagged instances.
+#   9. gcloud compute instances create from the new image,
+#      tagged for the firewall rule.
+#  10. Wait for ssh + probe HTTP. Print connection details.
+#
+# Aborting at PROMPT 1 leaves the local cluster running.
+# Aborting at PROMPT 2 leaves the local bundle written but
+# nothing in GCP.
+#
+# Every gcloud invocation passes --project=$GCP_PROJECT
+# explicitly. Auth is the service-account JSON pointed at by
+# $GOOGLE_APPLICATION_CREDENTIALS (created by
+# scripts/gcp-bootstrap-credentials.sh).
+
+[ -z "$DEBUG" ] || set -x
+set -eo pipefail
+
+YHELP='appliance-qemu-to-gcp.sh - local provision -> hands-on -> export -> ship to GCP
+
+Usage:
+  appliance-qemu-to-gcp.sh [--reuse-disk=true|false]   build + ship to GCP
+  appliance-qemu-to-gcp.sh teardown [--keep-disk=true|false]
+                                                       delete VM + image + GCS object;
+                                                       persistent disk preserved by
+                                                       default (state-preservation
+                                                       is the appliance design goal)
+
+Build flow disk handling:
+  --reuse-disk=true   reuse existing /data/yolean disk (preserves customer state
+                      across redeploys; the build image seed is no-op against an
+                      already-seeded marker -- this is the production upgrade path)
+  --reuse-disk=false  delete + recreate the disk (fresh disk lets the build image
+                      seed extract; use this for QA / end-to-end seed validation)
+  (no flag, TTY)      interactive prompt with default Y (reuse)
+  (no flag, no TTY)   error: explicit choice required for non-interactive runs
+
+Teardown reads GCP_PROJECT / GCP_ZONE / GCP_BUCKET / VM_NAME /
+GCP_DATADIR_DISK / NAME from the same env vars as the build
+flow. Custom images and GCS objects are deleted by NAME prefix
+(so different NAMEs in the same project do not clobber each
+other). The persistent data disk, the bucket itself, and the
+firewall rule are preserved unless --keep-disk=false is set
+(legacy alias: --delete-data-disk). Local cluster cleanup (if
+KEEP_LOCAL was set) is separate: y-cluster teardown -c \$CFG_DIR.
+
+Environment:
+  GCP_PROJECT       GCP project (set in .env or shell env; required)
+  GCP_REGION        GCP region (default: europe-north2 -- Stockholm)
+  GCP_ZONE          GCP zone (default: europe-north2-a)
+  GCP_BUCKET        GCS bucket for image tarballs
+                    (default: <project>-appliance-images)
+  GCP_MACHINE_TYPE  Compute Engine machine type (default: e2-standard-2 --
+                    2 vCPU / 8 GiB; the 4 GiB e2-medium OOMs the full
+                    appliance stack mid-run. The type bundles CPU + memory,
+                    so memory is not a separate knob.)
+  GCP_IMAGE_FAMILY  Image family tag (default: y-cluster-appliance)
+  GCP_DATADIR_DISK  Persistent disk for /data/yolean
+                    (default: appliance-gcp-datadir; preserved on teardown)
+  GCP_DATADIR_SIZE  Persistent disk size (default: 10GB; only used on create)
+  GCP_KEY           Service account JSON (set in .env or shell env; required)
+  NAME              Local cluster name (default: appliance-gcp-build).
+                    Used as the prefix for the deliverable directory.
+  KUBECTX           kubectl context name (default: local). Script
+                    bails if a context with this name already
+                    exists in your kubeconfig -- set KUBECTX to
+                    something else, or delete the existing one.
+  IMAGE_NAME        Custom image name in GCE (default: <NAME>-<UTC>)
+  VM_NAME           Compute Engine VM name (default: $NAME)
+  APP_HTTP_PORT     Override host port for guest 80 (y-cluster default: 80)
+  APP_HTTPS_PORT    Override host port for guest 443 (y-cluster default: 443)
+  APP_API_PORT      Override host port for guest 6443 (y-cluster default: 6443)
+  APP_SSH_PORT      Override host port for guest 22 (y-cluster default: 2222)
+  Y_CLUSTER         Path to dev binary (default: ./dist/y-cluster)
+  CACHE_DIR         Where y-cluster keeps its qcow2 (default: ~/.cache/y-cluster-qemu)
+  KEEP_LOCAL        Set to keep the local cluster after upload (default: tear down)
+  KEEP_BUNDLE       Set to keep the local export bundle (default: keep -- bundle path printed)
+  ASSUME_YES        Skip BOTH confirmations and proceed end-to-end.
+                    Also suppresses the optional TLS-LB prompt; set
+                    TLS_DOMAINS alongside to opt in unattended.
+  APPLIANCE_SEED_CMD  Optional shell cmd to run after echo install,
+                      before PROMPT 1. Receives the
+                      Y_CLUSTER_CURRENT_* env surface (build-side
+                      ports, kubeconfig context, ssh-key path).
+                      Typical: cd into a customer repo and apply
+                      kustomize bases that populate /data/yolean.
+                      Non-zero exit aborts; local cluster left up.
+  APPLIANCE_VERIFY_CMD  Optional shell cmd to run after the GCP
+                        deploy + optional TLS LB, before final
+                        summary. Receives the same Y_CLUSTER_CURRENT_*
+                        surface plus REMOTE_VM_IP / REMOTE_LB_IP /
+                        REMOTE_DOMAINS / REMOTE_SCHEME so a remote
+                        probe can curl --resolve through the
+                        deployed VM. Non-zero exit aborts; VM and
+                        LB stay up for inspection.
+  TLS_DOMAINS       Comma-separated FQDNs for an optional regional
+                    External HTTPS LoadBalancer with a self-signed
+                    cert (e.g., appliance.example.com,admin.appliance.example.com).
+                    Empty: skip the LB step. The HTTPRoutes must
+                    already match these hostnames.
+                    Special value "auto": derive the FQDN list from
+                    `y-cluster gateway hostnames --csv` against the
+                    just-provisioned cluster -- reconciled
+                    HTTPRoute / GRPCRoute hostnames become the LB
+                    cert SAN list, so the two can never drift.
+                    Aborts with an error when "auto" is set but the
+                    cluster has no non-wildcard hostnames yet.
+
+Dependencies:
+  go, qemu-system-x86_64, qemu-img, kubectl, ssh, ssh-keygen, curl,
+  virt-sysprep, gcloud
+'
+
+case "${1:-}" in
+  help) echo "$YHELP"; exit 0 ;;
+  --help) echo "$YHELP"; exit 0 ;;
+  -h) echo "$YHELP"; exit 0 ;;
+esac
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ -f "$REPO_ROOT/.env" ]]; then
+    set -o allexport; . "$REPO_ROOT/.env"; set +o allexport
+fi
+
+: "${GCP_PROJECT:?set GCP_PROJECT in .env or shell env}"
+: "${GCP_KEY:?set GCP_KEY in .env or shell env}"
+
+GCP_REGION="${GCP_REGION:-europe-north2}"
+GCP_ZONE="${GCP_ZONE:-europe-north2-a}"
+GCP_BUCKET="${GCP_BUCKET:-${GCP_PROJECT}-appliance-images}"
+# e2-standard-2: 2 vCPU / 8 GiB. The 4 GiB e2-medium OOMs once the
+# appliance is running a non-trivial workload stack; 8 GiB is the
+# floor we have validated. GCE machine types bundle CPU + memory so
+# a separate memory knob is not meaningful -- override the whole
+# type via GCP_MACHINE_TYPE for highmem / larger shapes.
+GCP_MACHINE_TYPE="${GCP_MACHINE_TYPE:-e2-standard-2}"
+GCP_IMAGE_FAMILY="${GCP_IMAGE_FAMILY:-y-cluster-appliance}"
+GCP_DATADIR_DISK="${GCP_DATADIR_DISK:-appliance-gcp-datadir}"
+GCP_DATADIR_SIZE="${GCP_DATADIR_SIZE:-10GB}"
+
+NAME="${NAME:-appliance-gcp-build}"
+KUBECTX="${KUBECTX:-local}"
+IMAGE_NAME="${IMAGE_NAME:-${NAME}-$(date -u +%Y%m%d-%H%M%S)}"
+VM_NAME="${VM_NAME:-$NAME}"
+
+Y_CLUSTER="${Y_CLUSTER:-$REPO_ROOT/dist/y-cluster}"
+CACHE_DIR="${CACHE_DIR:-$HOME/.cache/y-cluster-qemu}"
+CFG_DIR="${CFG_DIR:-$HOME/.cache/y-cluster-appliance-build/$NAME}"
+# Top-level deliverable dir. Holds two per-format subdirs --
+# `gcp-tar/` (uploaded to Compute Engine here) and `ova/`
+# (handed to a customer for VirtualBox / VMware Import
+# Appliance). Both subdirs are byte-equivalent disk states;
+# the only differences are the on-the-wire format and the
+# README boot instructions.
+BUNDLE_DIR="${BUNDLE_DIR:-$REPO_ROOT/dist/appliance/$NAME-$(date -u +%Y%m%dT%H%M%SZ)}"
+
+stage() { printf '\n=== %s ===\n' "$*"; }
+confirm() {
+    local prompt=$1
+    if [[ -n "${ASSUME_YES:-}" ]]; then
+        echo "ASSUME_YES set; proceeding ($prompt)"
+        return 0
+    fi
+    read -r -p "$prompt [y/N] " answer
+    case "${answer,,}" in
+        y|yes) return 0 ;;
+        *) return 1 ;;
+    esac
+}
+
+# prompt_yes_default is for irreversible decisions where the
+# default-on-Enter is YES (state preservation, design-goal aligned)
+# but the operator must EXPLICITLY pre-answer for non-interactive
+# runs. Distinct from confirm() in two ways:
+#   - default is Y, not N
+#   - ASSUME_YES is NOT consulted; non-TTY callers without an
+#     explicit flag get a clear error instead of a silent default.
+# Returns 0 for yes, 1 for no, exits 2 on no-TTY-no-flag.
+prompt_yes_default() {
+    local prompt=$1 missing_flag_hint=$2
+    if [[ ! -t 0 ]]; then
+        echo "non-interactive shell: $missing_flag_hint" >&2
+        exit 2
+    fi
+    read -r -p "$prompt [Y/n] " answer
+    case "${answer,,}" in
+        n|no) return 1 ;;
+        *) return 0 ;;
+    esac
+}
+
+# current_env exports the Y_CLUSTER_CURRENT_* surface a hook
+# cmd reads via printenv. Call right before invoking the cmd
+# so any vars computed since the last invocation (PUBLIC_IP,
+# BUNDLE_DIR, TLS-LB IP) are picked up. Vars not yet known at
+# the call site are exported as empty strings (not unset) so
+# a verify script can read them unconditionally.
+current_env() {
+    export Y_CLUSTER_CURRENT_NAME="$NAME"
+    export Y_CLUSTER_CURRENT_KUBECTX="$KUBECTX"
+    export Y_CLUSTER_CURRENT_LOCAL_HTTP_PORT="${APP_HTTP_PORT:-80}"
+    export Y_CLUSTER_CURRENT_LOCAL_HTTPS_PORT="${APP_HTTPS_PORT:-443}"
+    export Y_CLUSTER_CURRENT_LOCAL_API_PORT="${APP_API_PORT:-6443}"
+    export Y_CLUSTER_CURRENT_LOCAL_SSH_PORT="${APP_SSH_PORT:-2222}"
+    export Y_CLUSTER_CURRENT_LOCAL_SSH_KEY="${CACHE_DIR:-}/${NAME}-ssh"
+    export Y_CLUSTER_CURRENT_BUNDLE_DIR="${BUNDLE_DIR:-}"
+    export Y_CLUSTER_CURRENT_REMOTE_VM_NAME="${VM_NAME:-}"
+    export Y_CLUSTER_CURRENT_REMOTE_VM_IP="${PUBLIC_IP:-}"
+    export Y_CLUSTER_CURRENT_REMOTE_DOMAINS="${TLS_DOMAINS:-}"
+    if [[ -n "${TLS_DOMAINS:-}" ]]; then
+        # do_tls_frontend keeps lb_ip in local scope; re-query
+        # gcloud here so the verify hook can read it.
+        Y_CLUSTER_CURRENT_REMOTE_LB_IP=$(gcloud compute addresses describe "${NAME}-tls-ip" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --format='value(address)' 2>/dev/null \
+            || true) # y-script-lint:disable=or-true # absent IP -> empty var, hook decides
+        export Y_CLUSTER_CURRENT_REMOTE_LB_IP
+        export Y_CLUSTER_CURRENT_REMOTE_SCHEME=https
+    else
+        export Y_CLUSTER_CURRENT_REMOTE_LB_IP=""
+        export Y_CLUSTER_CURRENT_REMOTE_SCHEME=http
+    fi
+    export Y_CLUSTER_CURRENT_GCP_PROJECT="$GCP_PROJECT"
+    export Y_CLUSTER_CURRENT_GCP_ZONE="$GCP_ZONE"
+    export Y_CLUSTER_CURRENT_GCP_REGION="$GCP_REGION"
+}
+
+# do_teardown deletes GCP resources owned by this script's
+# NAME prefix in the configured project + zone. Reads the
+# same env vars as the build flow so a teardown after a
+# customised build (e.g., NAME=customer-foo) cleans up
+# exactly that customer's resources without touching other
+# NAMEs that share the same project.
+do_teardown() {
+    # delete_data_disk: 0=keep (default; design-goal state preservation),
+    # 1=delete. --keep-disk=true|false is the explicit form;
+    # --delete-data-disk is the legacy alias mapped to --keep-disk=false.
+    local delete_data_disk=0
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --keep-disk=true) delete_data_disk=0 ;;
+            --keep-disk=false) delete_data_disk=1 ;;
+            --delete-data-disk)
+                echo "  note: --delete-data-disk is deprecated; prefer --keep-disk=false"
+                delete_data_disk=1
+                ;;
+            *) echo "unknown teardown flag: $1" >&2; exit 2 ;;
+        esac
+        shift
+    done
+
+    stage "inventory in $GCP_PROJECT / $GCP_ZONE"
+    local vm images objects disk
+    vm=$(gcloud compute instances describe "$VM_NAME" \
+        --project="$GCP_PROJECT" --zone="$GCP_ZONE" \
+        --format='value(name)' 2>/dev/null) \
+        || true # y-script-lint:disable=or-true # missing VM is not an error
+    images=$(gcloud compute images list \
+        --project="$GCP_PROJECT" \
+        --no-standard-images \
+        --filter="name~^${NAME}-" \
+        --format='value(name)' 2>/dev/null) \
+        || true # y-script-lint:disable=or-true # empty list is not an error
+    objects=$(gcloud storage ls "gs://$GCP_BUCKET/${NAME}-*.tar.gz" \
+        --project="$GCP_PROJECT" 2>/dev/null) \
+        || true # y-script-lint:disable=or-true # missing bucket / no objects is not an error
+    disk=$(gcloud compute disks describe "$GCP_DATADIR_DISK" \
+        --project="$GCP_PROJECT" --zone="$GCP_ZONE" \
+        --format='value(name)' 2>/dev/null) \
+        || true # y-script-lint:disable=or-true # missing disk is not an error
+
+    echo
+    echo "Will DELETE:"
+    [[ -n "$vm" ]] && echo "  VM:                 $VM_NAME ($GCP_ZONE)"
+    if [[ -n "$images" ]]; then
+        echo "$images" | sed 's/^/  Image:              /'
+    fi
+    if [[ -n "$objects" ]]; then
+        echo "$objects" | sed 's|^|  GCS object:         |'
+    fi
+    if [[ $delete_data_disk -eq 1 && -n "$disk" ]]; then
+        echo "  Data disk:          $GCP_DATADIR_DISK (PERSISTENT DATA WILL BE LOST)"
+    fi
+    # If a TLS LB stack exists, do_tls_teardown will pick it up.
+    # We don't enumerate every resource here -- the function logs
+    # `deleting TLS LB stack ...` when it fires.
+    if gcloud compute forwarding-rules describe "${NAME}-tls-fr" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --format='value(name)' 2>/dev/null | grep -q .; then
+        echo "  TLS LB stack:       ${NAME}-tls-* (forwarding rule + 8 dependents)"
+    fi
+    echo
+    echo "Will PRESERVE:"
+    if [[ $delete_data_disk -eq 0 && -n "$disk" ]]; then
+        echo "  Data disk:          $GCP_DATADIR_DISK (--keep-disk=false to also remove)"
+    fi
+    echo "  GCS bucket:         gs://$GCP_BUCKET (objects matching $NAME-* deleted above)"
+    echo "  Firewall rule:      y-cluster-appliance-public (tag-based, shared)"
+    echo
+
+    if [[ -z "$vm" && -z "$images" && -z "$objects" ]] \
+            && { [[ $delete_data_disk -eq 0 ]] || [[ -z "$disk" ]]; }; then
+        echo "Nothing to delete."
+        exit 0
+    fi
+
+    confirm "Proceed with teardown?" \
+        || { echo "aborted; nothing deleted."; exit 0; }
+
+    if [[ -n "$vm" ]]; then
+        stage "deleting VM $VM_NAME"
+        gcloud compute instances delete "$VM_NAME" \
+            --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+    fi
+    if [[ -n "$images" ]]; then
+        stage "deleting custom images ($(echo "$images" | wc -l))"
+        # shellcheck disable=SC2086
+        echo "$images" | xargs -r -I{} \
+            gcloud compute images delete {} --project="$GCP_PROJECT" --quiet
+    fi
+    if [[ -n "$objects" ]]; then
+        stage "deleting GCS objects ($(echo "$objects" | wc -l))"
+        # shellcheck disable=SC2086
+        echo "$objects" | xargs -r \
+            gcloud storage rm --project="$GCP_PROJECT"
+    fi
+    if [[ $delete_data_disk -eq 1 && -n "$disk" ]]; then
+        stage "deleting persistent data disk $GCP_DATADIR_DISK"
+        gcloud compute disks delete "$GCP_DATADIR_DISK" \
+            --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+    fi
+
+    do_tls_teardown
+    stage "teardown complete"
+
+    # Surface the preservation contract at the moment the operator
+    # is about to step away. Previously this only appeared in the
+    # build-flow success block, where it was less actionable.
+    if [[ $delete_data_disk -eq 0 && -n "$disk" ]]; then
+        cat <<EOF
+
+Persistent data disk PRESERVED:
+  $GCP_DATADIR_DISK (zone=$GCP_ZONE)
+
+PVC data survives across redeploys -- re-running the build/deploy
+flow reuses the same /data/yolean. To start fresh from the next
+build's seed, pass --reuse-disk=false on the next deploy.
+
+Delete the disk manually when truly done:
+  gcloud compute disks delete $GCP_DATADIR_DISK \\
+      --project=$GCP_PROJECT --zone=$GCP_ZONE
+Or pass --keep-disk=false on the next teardown.
+EOF
+    fi
+}
+
+# do_tls_frontend stands up a regional External Application
+# Load Balancer in front of $VM_NAME with a self-signed cert
+# covering $1 (comma-separated FQDNs). Idempotent: each create
+# is describe-then-create, so re-runs converge.
+#
+# Resources are named ${NAME}-tls-* so do_tls_teardown can clean
+# them up alongside the rest of the appliance.
+#
+# Cost: regional EXTERNAL_MANAGED LB forwarding rule (~hourly)
+# + reserved IP (only while reserved). Both billed by the
+# forwarding-rule-hour and the IP-hour respectively, so teardown
+# stops the meter immediately.
+do_tls_frontend() {
+    local domains_csv=$1
+    local first_domain
+    first_domain=$(echo "$domains_csv" | cut -d, -f1)
+    local sans
+    sans="DNS:$(echo "$domains_csv" | sed 's/,/,DNS:/g')"
+    local cert_dir="$BUNDLE_DIR/tls"
+    mkdir -p "$cert_dir"
+
+    stage "generating self-signed cert for $domains_csv (90 days)"
+    openssl req -x509 -newkey rsa:2048 -nodes \
+        -keyout "$cert_dir/privkey.pem" -out "$cert_dir/fullchain.pem" \
+        -days 90 -subj "/CN=$first_domain" \
+        -addext "subjectAltName=$sans" 2>/dev/null
+    chmod 600 "$cert_dir/privkey.pem"
+
+    # Proxy-only subnet: required by regional EXTERNAL_MANAGED LBs,
+    # one ACTIVE per region+VPC. Reuse if any exists; otherwise
+    # create a per-build one we can clean up on teardown.
+    stage "ensuring proxy-only subnet in $GCP_REGION"
+    if gcloud compute networks subnets list \
+            --project="$GCP_PROJECT" \
+            --filter "region:$GCP_REGION AND purpose=REGIONAL_MANAGED_PROXY AND role=ACTIVE" \
+            --format='value(name)' 2>/dev/null | grep -q .; then
+        echo "  reusing existing proxy-only subnet"
+    else
+        gcloud compute networks subnets create "${NAME}-tls-proxy-subnet" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --network=default --range=192.168.42.0/24 \
+            --purpose=REGIONAL_MANAGED_PROXY --role=ACTIVE >/dev/null
+    fi
+
+    stage "reserving regional external IP ${NAME}-tls-ip"
+    if ! gcloud compute addresses describe "${NAME}-tls-ip" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute addresses create "${NAME}-tls-ip" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --network-tier=STANDARD >/dev/null
+    fi
+    local lb_ip
+    lb_ip=$(gcloud compute addresses describe "${NAME}-tls-ip" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(address)')
+
+    stage "uploading SSL cert ${NAME}-tls-cert"
+    if ! gcloud compute ssl-certificates describe "${NAME}-tls-cert" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute ssl-certificates create "${NAME}-tls-cert" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --certificate="$cert_dir/fullchain.pem" \
+            --private-key="$cert_dir/privkey.pem" >/dev/null
+    fi
+
+    stage "creating health check ${NAME}-tls-hc"
+    if ! gcloud compute health-checks describe "${NAME}-tls-hc" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute health-checks create http "${NAME}-tls-hc" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --port=80 --request-path=/q/envoy/echo \
+            --check-interval=10s --timeout=5s >/dev/null
+    fi
+
+    stage "creating network endpoint group ${NAME}-tls-neg"
+    if ! gcloud compute network-endpoint-groups describe "${NAME}-tls-neg" \
+            --project="$GCP_PROJECT" --zone="$GCP_ZONE" >/dev/null 2>&1; then
+        gcloud compute network-endpoint-groups create "${NAME}-tls-neg" \
+            --project="$GCP_PROJECT" --zone="$GCP_ZONE" \
+            --network-endpoint-type=GCE_VM_IP_PORT --default-port=80 >/dev/null
+    fi
+    # Always (re-)attach the VM endpoint, regardless of whether
+    # the NEG already existed. GCE auto-removes endpoints when
+    # the instance they reference is deleted, which Stage 9 does
+    # on every build (delete+recreate the VM for idempotency).
+    # If the NEG survived from a prior run but the VM was recreated,
+    # its endpoint reference is gone and the LB has no backend.
+    # Skip the add when the endpoint is already attached so re-runs
+    # without VM recreation stay quiet.
+    if ! gcloud compute network-endpoint-groups list-network-endpoints "${NAME}-tls-neg" \
+            --project="$GCP_PROJECT" --zone="$GCP_ZONE" \
+            --format='value(instance)' 2>/dev/null | grep -Fxq "$VM_NAME"; then
+        gcloud compute network-endpoint-groups update "${NAME}-tls-neg" \
+            --project="$GCP_PROJECT" --zone="$GCP_ZONE" \
+            --add-endpoint="instance=$VM_NAME,port=80" >/dev/null
+    fi
+
+    stage "creating backend service ${NAME}-tls-backend"
+    if ! gcloud compute backend-services describe "${NAME}-tls-backend" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute backend-services create "${NAME}-tls-backend" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --load-balancing-scheme=EXTERNAL_MANAGED --protocol=HTTP \
+            --health-checks="${NAME}-tls-hc" \
+            --health-checks-region="$GCP_REGION" >/dev/null
+        gcloud compute backend-services add-backend "${NAME}-tls-backend" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --network-endpoint-group="${NAME}-tls-neg" \
+            --network-endpoint-group-zone="$GCP_ZONE" \
+            --balancing-mode=RATE --max-rate-per-endpoint=100 >/dev/null
+    fi
+
+    stage "creating URL map ${NAME}-tls-urlmap"
+    if ! gcloud compute url-maps describe "${NAME}-tls-urlmap" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute url-maps create "${NAME}-tls-urlmap" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --default-service="projects/$GCP_PROJECT/regions/$GCP_REGION/backendServices/${NAME}-tls-backend" >/dev/null
+    fi
+
+    stage "creating target HTTPS proxy ${NAME}-tls-proxy"
+    if ! gcloud compute target-https-proxies describe "${NAME}-tls-proxy" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute target-https-proxies create "${NAME}-tls-proxy" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --url-map="${NAME}-tls-urlmap" \
+            --ssl-certificates="${NAME}-tls-cert" >/dev/null
+    fi
+
+    stage "creating forwarding rule ${NAME}-tls-fr (:443)"
+    if ! gcloud compute forwarding-rules describe "${NAME}-tls-fr" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute forwarding-rules create "${NAME}-tls-fr" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --load-balancing-scheme=EXTERNAL_MANAGED --network-tier=STANDARD \
+            --network=default --address="${NAME}-tls-ip" \
+            --target-https-proxy="${NAME}-tls-proxy" \
+            --target-https-proxy-region="$GCP_REGION" --ports=443 >/dev/null
+    fi
+
+    # === HTTP -> HTTPS redirect chain ===
+    # GCP regional EXTERNAL_MANAGED URL maps can do a default redirect
+    # but `gcloud compute url-maps create` has no flag for it -- we
+    # have to import a YAML body. A URL map can have either
+    # `defaultService` (forward) or `defaultUrlRedirect` (redirect),
+    # not both, hence the second URL map + second target proxy + second
+    # forwarding rule sharing the same reserved IP.
+    stage "creating redirect URL map ${NAME}-tls-redirect (HTTP -> HTTPS)"
+    if ! gcloud compute url-maps describe "${NAME}-tls-redirect" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute url-maps import "${NAME}-tls-redirect" \
+                --project="$GCP_PROJECT" --region="$GCP_REGION" \
+                --source=- --quiet >/dev/null <<YAML
+name: ${NAME}-tls-redirect
+defaultUrlRedirect:
+  httpsRedirect: true
+  redirectResponseCode: MOVED_PERMANENTLY_DEFAULT
+  stripQuery: false
+YAML
+    fi
+
+    stage "creating target HTTP proxy ${NAME}-tls-http-proxy"
+    if ! gcloud compute target-http-proxies describe "${NAME}-tls-http-proxy" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute target-http-proxies create "${NAME}-tls-http-proxy" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --url-map="${NAME}-tls-redirect" \
+            --url-map-region="$GCP_REGION" >/dev/null
+    fi
+
+    stage "creating forwarding rule ${NAME}-tls-fr-http (:80 -> redirect)"
+    if ! gcloud compute forwarding-rules describe "${NAME}-tls-fr-http" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" >/dev/null 2>&1; then
+        gcloud compute forwarding-rules create "${NAME}-tls-fr-http" \
+            --project="$GCP_PROJECT" --region="$GCP_REGION" \
+            --load-balancing-scheme=EXTERNAL_MANAGED --network-tier=STANDARD \
+            --network=default --address="${NAME}-tls-ip" \
+            --target-http-proxy="${NAME}-tls-http-proxy" \
+            --target-http-proxy-region="$GCP_REGION" --ports=80 >/dev/null
+    fi
+
+    cat <<EOF
+
+================================================================
+External HTTPS LoadBalancer ready.
+
+  IP:        $lb_ip
+  Hostnames: ${domains_csv//,/ }
+  Cert:      SELF-SIGNED (browser will warn; curl needs -k)
+  HTTP:      :80 -> 301 redirect to :443 (so plain http:// works
+             as long as the client follows redirects, e.g. curl -L)
+
+To test from another machine, append this single line to /etc/hosts:
+
+  $lb_ip  ${domains_csv//,/ }
+
+For a real cert (cert-manager / Let's Encrypt), upload a fresh PEM
++ key as ${NAME}-tls-cert-vN, then point the proxy at it via
+\`gcloud compute target-https-proxies update ${NAME}-tls-proxy
+--ssl-certificates=${NAME}-tls-cert-vN --region=$GCP_REGION\`.
+================================================================
+
+EOF
+}
+
+# do_tls_teardown deletes everything do_tls_frontend created.
+# Idempotent: missing resources are not errors. Order matters --
+# the forwarding rule has to go before the proxy/url-map/backend
+# chain, and the IP after.
+do_tls_teardown() {
+    local fr fr_http proxy http_proxy urlmap urlmap_redirect backend neg hc cert ip subnet
+    fr=$(gcloud compute forwarding-rules describe "${NAME}-tls-fr" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing fr is not an error
+    fr_http=$(gcloud compute forwarding-rules describe "${NAME}-tls-fr-http" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing :80 redirect fr is not an error
+    proxy=$(gcloud compute target-https-proxies describe "${NAME}-tls-proxy" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing proxy is not an error
+    http_proxy=$(gcloud compute target-http-proxies describe "${NAME}-tls-http-proxy" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing :80 redirect proxy is not an error
+    urlmap=$(gcloud compute url-maps describe "${NAME}-tls-urlmap" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing url-map is not an error
+    urlmap_redirect=$(gcloud compute url-maps describe "${NAME}-tls-redirect" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing redirect url-map is not an error
+    backend=$(gcloud compute backend-services describe "${NAME}-tls-backend" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing backend is not an error
+    neg=$(gcloud compute network-endpoint-groups describe "${NAME}-tls-neg" \
+        --project="$GCP_PROJECT" --zone="$GCP_ZONE" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing neg is not an error
+    hc=$(gcloud compute health-checks describe "${NAME}-tls-hc" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing hc is not an error
+    cert=$(gcloud compute ssl-certificates describe "${NAME}-tls-cert" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing cert is not an error
+    ip=$(gcloud compute addresses describe "${NAME}-tls-ip" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing ip is not an error
+    subnet=$(gcloud compute networks subnets describe "${NAME}-tls-proxy-subnet" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" \
+        --format='value(name)' 2>/dev/null) || true # y-script-lint:disable=or-true # missing subnet is not an error
+
+    if [[ -z "$fr$fr_http$proxy$http_proxy$urlmap$urlmap_redirect$backend$neg$hc$cert$ip$subnet" ]]; then
+        return
+    fi
+
+    stage "deleting TLS LB stack (${NAME}-tls-*)"
+    # Forwarding rules first (they reference proxies) -- both :443
+    # and the :80 redirect.
+    [[ -n "$fr" ]] && gcloud compute forwarding-rules delete "${NAME}-tls-fr" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    [[ -n "$fr_http" ]] && gcloud compute forwarding-rules delete "${NAME}-tls-fr-http" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    # Then proxies (they reference URL maps).
+    [[ -n "$proxy" ]] && gcloud compute target-https-proxies delete "${NAME}-tls-proxy" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    [[ -n "$http_proxy" ]] && gcloud compute target-http-proxies delete "${NAME}-tls-http-proxy" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    # Then URL maps (the :443 backend-pointing one + the :80 redirect one).
+    [[ -n "$urlmap" ]] && gcloud compute url-maps delete "${NAME}-tls-urlmap" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    [[ -n "$urlmap_redirect" ]] && gcloud compute url-maps delete "${NAME}-tls-redirect" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    [[ -n "$backend" ]] && gcloud compute backend-services delete "${NAME}-tls-backend" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    [[ -n "$neg" ]] && gcloud compute network-endpoint-groups delete "${NAME}-tls-neg" \
+        --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+    [[ -n "$hc" ]] && gcloud compute health-checks delete "${NAME}-tls-hc" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    [[ -n "$cert" ]] && gcloud compute ssl-certificates delete "${NAME}-tls-cert" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    [[ -n "$ip" ]] && gcloud compute addresses delete "${NAME}-tls-ip" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    # Subnet last: only delete the per-build one (do_tls_frontend
+    # never creates a subnet that already exists, so anything named
+    # ${NAME}-tls-proxy-subnet was definitely ours).
+    [[ -n "$subnet" ]] && gcloud compute networks subnets delete "${NAME}-tls-proxy-subnet" \
+        --project="$GCP_PROJECT" --region="$GCP_REGION" --quiet >/dev/null
+    # Force a 0 return: the [[ -n "$subnet" ]] && ... pattern above
+    # returns 1 when $subnet is empty (subnet was reused, not
+    # created by this run). Without this, set -e in the caller
+    # treats the function as failed and aborts before the
+    # "teardown complete" stage + the PRESERVED message can fire.
+    return 0
+}
+
+# Minimal pre-checks shared by build and teardown: gcloud
+# binary + GCP key + activation. The build flow does
+# additional tool checks below the dispatch.
+command -v gcloud >/dev/null \
+    || { echo "missing required tool: gcloud" >&2; exit 1; }
+
+if [[ ! -f "$GCP_KEY" ]]; then
+    echo "missing GCP key: $GCP_KEY" >&2
+    echo "create it with: scripts/gcp-bootstrap-credentials.sh on a machine with gcloud Owner access" >&2
+    exit 1
+fi
+# Fail fast on a malformed key file (truncated, wrong export
+# format, expired exports that lost their private_key) BEFORE
+# gcloud activate-service-account errors with a less helpful
+# message. The four fields below are the minimum GCP requires
+# to authenticate as a service account.
+if ! jq -e 'all(.type == "service_account"; .) and (.project_id // empty | length > 0) and (.client_email // empty | length > 0) and (.private_key // empty | length > 0)' "$GCP_KEY" >/dev/null 2>&1; then
+    echo "GCP key at $GCP_KEY is missing required fields" >&2
+    echo "  expected JSON with: type=service_account, project_id, client_email, private_key" >&2
+    echo "  regenerate via: scripts/gcp-bootstrap-credentials.sh" >&2
+    exit 1
+fi
+export GOOGLE_APPLICATION_CREDENTIALS="$GCP_KEY"
+
+# Acknowledge parallel composite uploads up front. The setting
+# both turns on multi-stream uploads (which is what we want for
+# 1.5+ GiB tarballs) AND silences the WARNING stanza gcloud
+# would otherwise emit on every `storage cp`. Env-var form so
+# we don't mutate the operator's gcloud config.
+export CLOUDSDK_STORAGE_PARALLEL_COMPOSITE_UPLOAD_ENABLED=True
+
+stage "activating GCP service account ($GCP_KEY)"
+gcloud auth activate-service-account --key-file="$GCP_KEY" --project="$GCP_PROJECT" >/dev/null
+
+# Subcommand dispatch. Teardown only needs gcloud + GCP_KEY,
+# both verified above; doesn't need go / qemu-img / etc. so
+# the build-flow tool check below stays out of its path.
+if [[ "${1:-}" = "teardown" ]]; then
+    shift
+    do_teardown "$@"
+    exit 0
+fi
+
+# Build-flow arg parsing. Today: just --reuse-disk=true|false.
+# Empty REUSE_DISK + interactive prompt at Stage 8.5 if the disk
+# already exists; non-interactive runs without the flag error
+# out at Stage 8.5 with a clear message.
+REUSE_DISK=""
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --reuse-disk=true) REUSE_DISK=true ;;
+        --reuse-disk=false) REUSE_DISK=false ;;
+        *) echo "unknown build flag: $1" >&2; exit 2 ;;
+    esac
+    shift
+done
+
+# Fail early if ASSUME_YES is set without an explicit disk
+# decision: ASSUME_YES means "unattended; don't prompt me", and
+# disk handling is irreversible (--reuse-disk=false destroys
+# customer state). Don't let that slip through to a prompt at
+# Stage 8.5 that would either hang (TTY-less) or be answered
+# by a default the operator never deliberately picked.
+if [[ -n "${ASSUME_YES:-}" && -z "$REUSE_DISK" ]]; then
+    echo "ASSUME_YES set without --reuse-disk=true|false: refusing to" >&2
+    echo "guess at an irreversible decision. Pick one explicitly:" >&2
+    echo "  --reuse-disk=true   preserve customer state (production upgrade)" >&2
+    echo "  --reuse-disk=false  delete + recreate (QA seed validation)" >&2
+    exit 2
+fi
+
+# Build-flow tool check (additional to gcloud above).
+for tool in go qemu-system-x86_64 qemu-img kubectl ssh ssh-keygen curl virt-sysprep; do
+    command -v "$tool" >/dev/null \
+        || { echo "missing required tool: $tool" >&2; exit 1; }
+done
+
+# virt-sysprep needs to read /boot/vmlinuz-* (libguestfs supermin).
+if ! [ -r /boot/vmlinuz-"$(uname -r)" ]; then
+    cat >&2 <<EOF
+/boot/vmlinuz-$(uname -r) is not readable; virt-sysprep will fail.
+Fix one of:
+  sudo chmod +r /boot/vmlinuz-*
+  sudo dpkg-statoverride --update --add root root 0644 /boot/vmlinuz-$(uname -r)
+EOF
+    exit 1
+fi
+
+# === Stage 1: build dev binary + provision local qemu ===
+stage "building dev binary -> $Y_CLUSTER"
+mkdir -p "$(dirname "$Y_CLUSTER")"
+( cd "$REPO_ROOT" && go build -o "$Y_CLUSTER" ./cmd/y-cluster )
+
+mkdir -p "$CFG_DIR"
+# YAML emission omits any port the operator didn't override, letting
+# y-cluster's Go binary apply its own defaults (sshPort=2222,
+# portForwards={6443:6443, 80:80, 443:443}). Set APP_*_PORT to take
+# different values; otherwise the script doesn't restate y-cluster's
+# defaults in two places.
+{
+    echo "provider: qemu"
+    echo "name: $NAME"
+    echo "context: $KUBECTX"
+    [ -n "${APP_SSH_PORT:-}" ] && printf 'sshPort: "%s"\n' "$APP_SSH_PORT"
+    echo 'memory: "8192"'
+    echo 'cpus: "2"'
+    echo 'diskSize: "40G"'
+    if [ -n "${APP_HTTP_PORT:-}" ] || [ -n "${APP_HTTPS_PORT:-}" ] || [ -n "${APP_API_PORT:-}" ]; then
+        echo "portForwards:"
+        [ -n "${APP_API_PORT:-}" ]   && printf '  - host: "%s"\n    guest: "6443"\n' "$APP_API_PORT"
+        [ -n "${APP_HTTP_PORT:-}" ]  && printf '  - host: "%s"\n    guest: "80"\n'   "$APP_HTTP_PORT"
+        [ -n "${APP_HTTPS_PORT:-}" ] && printf '  - host: "%s"\n    guest: "443"\n'  "$APP_HTTPS_PORT"
+    fi
+} > "$CFG_DIR/y-cluster-provision.yaml"
+
+stage "tearing down any leftover $NAME cluster"
+"$Y_CLUSTER" teardown -c "$CFG_DIR" || true # y-script-lint:disable=or-true # idempotent re-entry: missing cluster is not an error
+
+# Bail-out guard: our own teardown above would have removed
+# the kubectl context THIS script registered on a previous
+# run. A surviving "$KUBECTX" entry means something else owns
+# it (e.g., a parallel y-cluster cluster, or the operator's
+# personal "local" dev cluster). We refuse to clobber.
+if kubectl config get-contexts -o name 2>/dev/null | grep -Fxq "$KUBECTX"; then
+    echo "kubectl context '$KUBECTX' already exists and is not owned by this script." >&2
+    echo "  Either remove it:    kubectl config delete-context $KUBECTX" >&2
+    echo "  Or pick a new name:  KUBECTX=appliance-qa $0" >&2
+    exit 1
+fi
+
+stage "provisioning $NAME (k3s + Envoy Gateway)"
+"$Y_CLUSTER" provision -c "$CFG_DIR"
+
+# Echo is what creates the Gateway listener (not just the
+# Envoy Gateway controller -- the actual Gateway resource that
+# binds :80). Without it, any HTTPRoute the operator applies
+# in the hands-on window has nothing to attach to and curl
+# returns "connection refused" both locally and on the eventual
+# GCP VM. Auto-install so the Gateway listener is up by default;
+# operators can still delete + replace echo with their own
+# workload (the Gateway listener stays, the routing changes).
+stage "installing echo workload (Gateway listener + baseline route)"
+"$Y_CLUSTER" echo render \
+    | kubectl --context="$KUBECTX" apply --server-side --field-manager=appliance-build -f -
+kubectl --context="$KUBECTX" -n y-cluster wait \
+    --for=condition=Available deployment/echo --timeout=180s
+
+# Seed hook: caller-supplied cmd runs after echo is up but
+# before PROMPT 1 / TLS_DOMAINS=auto resolution. Customer
+# workloads applied here (mariadb, kafka, keycloak, HTTPRoute /
+# GRPCRoute resources, etc.) populate /data/yolean for the data-seed
+# extraction in prepare-export AND give TLS_DOMAINS=auto real
+# hostnames to derive from. Non-zero exit aborts; local
+# cluster stays up for inspection (set -e + the
+# "aborted; local cluster left running" semantics of the
+# upcoming PROMPT 1 path are what the operator falls back on).
+if [[ -n "${APPLIANCE_SEED_CMD:-}" ]]; then
+    stage "applying seed (APPLIANCE_SEED_CMD)"
+    current_env
+    # set -o pipefail so a `cmd | tee log` chain in the
+    # caller's string doesn't swallow upstream failures.
+    bash -c "set -o pipefail; $APPLIANCE_SEED_CMD"
+fi
+
+# === Stage 2: hands-on prompt ===
+SSH_KEY="$CACHE_DIR/$NAME-ssh"
+cat <<EOF
+
+================================================================
+Local cluster $NAME is up. Echo is already serving on :80.
+
+  Echo route (baseline, already up):
+    curl -sf http://127.0.0.1:${APP_HTTP_PORT:-80}/q/envoy/echo
+
+  Kubernetes API:   https://127.0.0.1:${APP_API_PORT:-6443}
+  kubectl context:  $KUBECTX
+
+Optional: apply more workloads before the disk gets sealed.
+The Gateway listener echo brought up is shared, so HTTPRoutes
+in any namespace can attach to it.
+
+  # S3 backend example (VersityGW StatefulSet on local-path PV):
+  $Y_CLUSTER yconverge --context=$KUBECTX -k $REPO_ROOT/testdata/appliance-stateful/base
+  curl -sf http://127.0.0.1:${APP_HTTP_PORT:-80}/s3/health
+
+  # Re-apply echo (e.g., after editing the manifest):
+  $Y_CLUSTER echo render | kubectl --context=$KUBECTX apply -f -
+
+  # Your own workloads:
+  kubectl --context=$KUBECTX apply -f my-workload.yaml
+  $Y_CLUSTER yconverge --context=$KUBECTX -k path/to/kustomize-base
+
+SSH into the local VM (passwordless sudo as ystack):
+  ssh -i $SSH_KEY -p ${APP_SSH_PORT:-2222} \\
+      -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \\
+      ystack@127.0.0.1
+
+Once you confirm, the local cluster will be stopped, the disk
+will be sealed (prepare-export), packed as a GCE-custom-image
+tarball, uploaded to GCS, and a VM will be created from it in
+$GCP_PROJECT/$GCP_ZONE.
+
+(KEEP_LOCAL=1 to keep the local cluster running after upload.)
+================================================================
+
+EOF
+
+confirm "Proceed to export + GCP deploy?" \
+    || { echo "aborted; local cluster left running. Teardown with: $Y_CLUSTER teardown -c $CFG_DIR"; exit 0; }
+
+# Resolve TLS_DOMAINS=auto against the LIVE cluster while the
+# apiserver is still up. By the time we reach the TLS LB stage
+# (after prepare-export and the GCP deploy), the local cluster
+# is gone and `gateway hostnames` would have nothing to read.
+# Other TLS_DOMAINS values (literal CSV / empty / prompt) are
+# handled at the LB stage itself; only "auto" needs the live
+# cluster query here.
+if [[ "${TLS_DOMAINS:-}" == "auto" ]]; then
+    stage "deriving TLS_DOMAINS from gateway state"
+    TLS_DOMAINS=$("$Y_CLUSTER" gateway hostnames --context="$KUBECTX" --csv)
+    [[ -n "$TLS_DOMAINS" ]] || {
+        echo "ERROR: TLS_DOMAINS=auto but the cluster's gateway state has no non-wildcard hostnames." >&2
+        echo "  Apply HTTPRoutes with .spec.hostnames first, or set TLS_DOMAINS=foo,bar to override." >&2
+        exit 1
+    }
+    echo "  TLS_DOMAINS=$TLS_DOMAINS"
+fi
+
+# === Stage 3: prepare-export + export gcp-tar ===
+# prepare-export needs the cluster RUNNING: its live phase
+# clears the per-deploy yolean.se/dns-hint-ip annotation and
+# snapshots reconciled Gateway state into <cacheDir>/<name>-
+# gateway-state.json. It then stops the VM itself before the
+# offline phase (libguestfs needs the disk not in use).
+# Calling `y-cluster stop` here would defeat that.
+stage "prepare-export ($NAME)"
+"$Y_CLUSTER" prepare-export --context="$KUBECTX"
+
+# Dual export to per-format subdirs of the deliverable.
+# Both reads come from the same prepare-export'd qcow2 so
+# the disk state is byte-identical; the only differences are
+# the on-the-wire packaging (tar.gz with disk.raw vs OVF +
+# streamOptimized VMDK in tar) and the per-format README.
+# The SSH keypair `<name>-ssh{,.pub}` lands in both subdirs;
+# the pair is identical (one keypair was generated at
+# provision time, both export passes copy from the same
+# source under $CACHE_DIR).
+mkdir -p "$BUNDLE_DIR"
+
+stage "exporting Compute Engine image format -> $BUNDLE_DIR/gcp-tar"
+"$Y_CLUSTER" export --context="$KUBECTX" --format=gcp-tar "$BUNDLE_DIR/gcp-tar"
+
+stage "exporting OVA (VirtualBox / VMware Import Appliance) -> $BUNDLE_DIR/ova"
+"$Y_CLUSTER" export --context="$KUBECTX" --format=ova "$BUNDLE_DIR/ova"
+
+ls -lh "$BUNDLE_DIR"/*/
+TARBALL="$BUNDLE_DIR/gcp-tar/$NAME.tar.gz"
+
+# === Stage 4: confirm before any GCP write ===
+cat <<EOF
+
+================================================================
+Local export ready: $TARBALL
+  size: $(stat -c '%s' "$TARBALL" | numfmt --to=iec-i --suffix=B 2>/dev/null || stat -c '%s' "$TARBALL")
+
+Next: upload to gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz, create a
+GCE custom image, ensure firewall opens tcp:80 + tcp:443 on
+tagged VMs, create $VM_NAME ($GCP_MACHINE_TYPE in $GCP_ZONE)
+from the image. Aborting now leaves the bundle on local disk
+unchanged.
+================================================================
+
+EOF
+
+confirm "Upload $TARBALL to GCS and create VM in $GCP_PROJECT?" \
+    || { echo "aborted; bundle preserved at $BUNDLE_DIR."; exit 0; }
+
+# === Stage 5: GCS bucket (idempotent) ===
+stage "ensuring GCS bucket gs://$GCP_BUCKET (location $GCP_REGION)"
+if ! gcloud storage buckets describe "gs://$GCP_BUCKET" --project="$GCP_PROJECT" >/dev/null 2>&1; then
+    gcloud storage buckets create "gs://$GCP_BUCKET" \
+        --project="$GCP_PROJECT" \
+        --location="$GCP_REGION" \
+        --uniform-bucket-level-access
+else
+    echo "  bucket exists"
+fi
+
+# === Stage 6: upload tarball ===
+stage "uploading $TARBALL -> gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz"
+gcloud storage cp "$TARBALL" "gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz" --project="$GCP_PROJECT"
+
+# === Stage 7: create custom image ===
+stage "creating GCE custom image $IMAGE_NAME (family $GCP_IMAGE_FAMILY)"
+gcloud compute images create "$IMAGE_NAME" \
+    --project="$GCP_PROJECT" \
+    --source-uri="gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz" \
+    --family="$GCP_IMAGE_FAMILY" \
+    --architecture=X86_64 \
+    >/dev/null
+
+# === Stage 8: firewall rule (idempotent) ===
+FIREWALL_RULE="y-cluster-appliance-public"
+stage "ensuring firewall rule $FIREWALL_RULE (tcp:80,443 -> y-cluster-appliance tag)"
+if ! gcloud compute firewall-rules describe "$FIREWALL_RULE" --project="$GCP_PROJECT" >/dev/null 2>&1; then
+    gcloud compute firewall-rules create "$FIREWALL_RULE" \
+        --project="$GCP_PROJECT" \
+        --direction=INGRESS \
+        --network=default \
+        --action=ALLOW \
+        --rules=tcp:80,tcp:443 \
+        --target-tags=y-cluster-appliance \
+        --source-ranges=0.0.0.0/0 \
+        >/dev/null
+else
+    echo "  rule exists"
+fi
+
+# === Stage 8.5: ensure persistent data disk ===
+# Persistent disk attached to the VM and mounted at /data/yolean
+# (the bundled local-path-provisioner's default storage root).
+# Survives instance redeploys: tear down the VM, redeploy with a
+# fresh image, the same /data/yolean comes back. Disk auto-delete
+# is OFF when attaching an existing disk via --disk=name=, so
+# `instances delete` won't wipe it.
+#
+# Reuse decision (when the disk already exists):
+#   --reuse-disk=true  reuse silently (preserves customer state -- the
+#                      production upgrade path; data-seed unit no-ops
+#                      on the existing marker, so the new image's seed
+#                      is correctly NOT applied)
+#   --reuse-disk=false delete + recreate (fresh disk lets the build
+#                      image's seed extract -- the QA validation path)
+#   no flag, TTY       interactive prompt, default Y (reuse)
+#   no flag, no TTY    error + exit (explicit choice required for
+#                      irreversible decisions in unattended runs)
+stage "ensuring persistent data disk $GCP_DATADIR_DISK (size only used on create: $GCP_DATADIR_SIZE)"
+if gcloud compute disks describe "$GCP_DATADIR_DISK" \
+        --project="$GCP_PROJECT" --zone="$GCP_ZONE" >/dev/null 2>&1; then
+    case "$REUSE_DISK" in
+        true)
+            echo "  disk exists -- reusing (--reuse-disk=true; preserves customer state)"
+            ;;
+        false)
+            echo "  disk exists -- --reuse-disk=false: deleting and recreating"
+            # Detach by deleting the VM first if it's still attached.
+            # Stage 9 normally handles VM deletion for idempotency; we
+            # do it here too because gcloud compute disks delete refuses
+            # while the disk is mounted on a running instance.
+            if gcloud compute instances describe "$VM_NAME" \
+                    --project="$GCP_PROJECT" --zone="$GCP_ZONE" >/dev/null 2>&1; then
+                echo "  $VM_NAME exists, deleting first to release disk"
+                gcloud compute instances delete "$VM_NAME" \
+                    --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+            fi
+            gcloud compute disks delete "$GCP_DATADIR_DISK" \
+                --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+            gcloud compute disks create "$GCP_DATADIR_DISK" \
+                --project="$GCP_PROJECT" \
+                --zone="$GCP_ZONE" \
+                --size="$GCP_DATADIR_SIZE" \
+                --type=pd-balanced \
+                >/dev/null
+            echo "  disk recreated (fresh; will be ext4-formatted on first mount)"
+            ;;
+        *)
+            if prompt_yes_default \
+                    "  Reuse existing data disk $GCP_DATADIR_DISK with its preserved state?" \
+                    "pass --reuse-disk=true (preserve state) or --reuse-disk=false (delete + reseed)"; then
+                echo "  reusing (preserves customer state)"
+            else
+                echo "  --reuse-disk=false chosen: deleting and recreating"
+                if gcloud compute instances describe "$VM_NAME" \
+                        --project="$GCP_PROJECT" --zone="$GCP_ZONE" >/dev/null 2>&1; then
+                    echo "  $VM_NAME exists, deleting first to release disk"
+                    gcloud compute instances delete "$VM_NAME" \
+                        --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+                fi
+                gcloud compute disks delete "$GCP_DATADIR_DISK" \
+                    --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+                gcloud compute disks create "$GCP_DATADIR_DISK" \
+                    --project="$GCP_PROJECT" \
+                    --zone="$GCP_ZONE" \
+                    --size="$GCP_DATADIR_SIZE" \
+                    --type=pd-balanced \
+                    >/dev/null
+                echo "  disk recreated (fresh; will be ext4-formatted on first mount)"
+            fi
+            ;;
+    esac
+else
+    gcloud compute disks create "$GCP_DATADIR_DISK" \
+        --project="$GCP_PROJECT" \
+        --zone="$GCP_ZONE" \
+        --size="$GCP_DATADIR_SIZE" \
+        --type=pd-balanced \
+        >/dev/null
+    echo "  disk created (fresh; will be ext4-formatted on first mount)"
+fi
+
+# === Stage 9: create VM (delete first if exists for idempotency) ===
+stage "creating $VM_NAME ($GCP_MACHINE_TYPE in $GCP_ZONE) from image $IMAGE_NAME"
+if gcloud compute instances describe "$VM_NAME" --project="$GCP_PROJECT" --zone="$GCP_ZONE" >/dev/null 2>&1; then
+    echo "  $VM_NAME exists, deleting first"
+    gcloud compute instances delete "$VM_NAME" \
+        --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+fi
+# device-name=datadir is what GCE writes after the
+# `scsi-0Google_PersistentDisk_` prefix in /dev/disk/by-id/
+# inside the VM; the SSH-side mount block uses that stable path
+# regardless of /dev/sd* enumeration order.
+gcloud compute instances create "$VM_NAME" \
+    --project="$GCP_PROJECT" \
+    --zone="$GCP_ZONE" \
+    --machine-type="$GCP_MACHINE_TYPE" \
+    --image="$IMAGE_NAME" \
+    --image-project="$GCP_PROJECT" \
+    --boot-disk-size=40GB \
+    --disk="name=$GCP_DATADIR_DISK,device-name=datadir,mode=rw,boot=no" \
+    --tags=y-cluster-appliance \
+    >/dev/null
+
+PUBLIC_IP=$(gcloud compute instances describe "$VM_NAME" \
+    --project="$GCP_PROJECT" \
+    --zone="$GCP_ZONE" \
+    --format='get(networkInterfaces[0].accessConfigs[0].natIP)')
+echo "  public ip: $PUBLIC_IP"
+
+# === Stage 10: wait for ssh + probe ===
+# SSH_KEY (from CACHE_DIR) was used by the local cluster but is
+# wiped by `y-cluster teardown` at the end of this flow. The
+# bundle-dir copy is what the operator can reach the GCP VM
+# with afterwards. Switch to the bundle path BEFORE teardown
+# runs so subsequent prints reference the path that'll exist.
+SSH_KEY="$BUNDLE_DIR/gcp-tar/$NAME-ssh"
+SSH_OPTS="-i $SSH_KEY -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5"
+echo "  waiting for ssh on $PUBLIC_IP:22 (cloud-init can take 30-90s on first boot)"
+ssh_up=0
+for i in $(seq 1 60); do
+    # shellcheck disable=SC2086
+    if ssh $SSH_OPTS -p 22 ystack@"$PUBLIC_IP" 'true' 2>/dev/null; then
+        echo "  ssh up after $i attempt(s)"
+        ssh_up=1
+        break
+    fi
+    echo "  ssh attempt $i/60: not yet"
+    sleep 5
+done
+if [[ $ssh_up -eq 0 ]]; then
+    echo "ssh on $PUBLIC_IP never came up; VM left running for diagnosis" >&2
+    echo "  delete with: gcloud compute instances delete $VM_NAME --project=$GCP_PROJECT --zone=$GCP_ZONE" >&2
+    exit 1
+fi
+
+# === Stage 10.5: mount the persistent disk at /data/yolean ===
+# The appliance disk doesn't carry GCE guest-tools and our
+# prepare_inguest pinned cloud-init to NoCloud only, so we can't
+# mount via cloud-init mounts/ or via google-startup-scripts.
+# We SSH in and do it directly:
+#   - format the disk if it has no filesystem (fresh disk)
+#   - persist the mount via fstab UUID for subsequent reboots
+#   - mount now
+#   - restart k3s so it re-discovers /data/yolean (k3s started
+#     before the mount existed; existing PVs would have mapped
+#     to empty paths on the root FS until restart)
+stage "mounting $GCP_DATADIR_DISK at /data/yolean and restarting k3s"
+# shellcheck disable=SC2087
+ssh $SSH_OPTS ystack@"$PUBLIC_IP" 'sudo bash -s' <<'REMOTE'
+set -eu
+# /dev/disk/by-id/google-<device-name> requires google-guest-agent,
+# which only ships in Google's own GCE images. Our appliance is
+# built from the upstream Ubuntu cloud image, so we get the
+# kernel-provided SCSI udev path instead:
+#   /dev/disk/by-id/scsi-0Google_PersistentDisk_<device-name>
+# `<device-name>` is what we passed to `gcloud --disk=device-name=datadir`,
+# so the path is fully deterministic. We try both shapes -- SCSI
+# first (matches the current appliance) and the guest-agent shape
+# as a fallback for a future build that does install the agent.
+MOUNT=/data/yolean
+DEVICE=""
+for cand in /dev/disk/by-id/scsi-0Google_PersistentDisk_datadir /dev/disk/by-id/google-datadir; do
+    for _ in $(seq 1 30); do
+        if [ -b "$cand" ]; then
+            DEVICE="$cand"
+            break 2
+        fi
+        sleep 1
+    done
+done
+[ -n "$DEVICE" ] || { echo "datadir disk never appeared at any expected /dev/disk/by-id/ path" >&2; exit 1; }
+echo "datadir: $DEVICE"
+
+# Format with the label that matches the appliance's pre-baked
+# fstab entry (LABEL=y-cluster-data /data/yolean ext4 ...).
+# Using a different label, or adding a UUID-based fstab line,
+# would either skip the pre-bake mount or duplicate it -- we
+# want the LABEL line to be the one that fires at boot.
+if ! blkid "$DEVICE" >/dev/null 2>&1; then
+    mkfs.ext4 -F -L y-cluster-data "$DEVICE"
+fi
+# Idempotent label enforcement: re-running this script against a
+# data disk that was formatted by a PREVIOUS version of the script
+# (with a different label, e.g. `data-yolean`) would skip mkfs
+# above (blkid finds an existing FS) and leave the wrong label in
+# place. The appliance's pre-baked /etc/fstab matches by LABEL, so
+# a wrong label means the boot-time mount silently no-ops and the
+# seed gate fails. e2label is a no-op when the label is already
+# correct, so applying it unconditionally is cheap insurance.
+e2label "$DEVICE" y-cluster-data
+
+install -d -m 0755 "$MOUNT"
+if ! mountpoint -q "$MOUNT"; then
+    mount "$MOUNT"
+fi
+
+# At first boot the seed unit ran before this disk was formatted
+# and mounted, so it failed the mount-required gate and k3s.service
+# stayed down on its Requires=. Now that /data/yolean is a real
+# mountpoint, restart the seed unit so it extracts the seed onto
+# the customer's volume, then k3s.
+systemctl reset-failed y-cluster-data-seed.service k3s.service
+systemctl restart y-cluster-data-seed.service
+systemctl restart k3s.service
+REMOTE
+
+probe() {
+    local what=$1 url=$2 attempts=${3:-60}
+    for i in $(seq 1 "$attempts"); do
+        if curl -fsS --max-time 8 -o /dev/null -w "  $what HTTP %{http_code}\n" "$url"; then
+            return 0
+        fi
+        echo "  $what attempt $i/$attempts: no answer yet"
+        sleep 10
+    done
+    return 1
+}
+
+stage "probing http://$PUBLIC_IP -- whatever you applied locally"
+# We don't know the operator's routes a priori; try the
+# y-cluster-shipped echo path as a baseline. If their workload
+# replaced echo, this fails and the operator curls their own
+# route.
+#
+# We deliberately do NOT enumerate Gateway routes via SSH+kubectl
+# here. Post-import scripting that uses maintainer-only paths
+# (SSH keys + kubectl access) is a smell: the customer's hosting
+# can't replicate it, and tying the build flow to the api-server
+# warm-up period creates spurious halts on transient kubectl
+# failures (we hit one of those during this script's lifetime --
+# the silent abort right after `systemctl restart k3s.service`).
+# Any per-route probing belongs in the consumer-side validate
+# script which knows its own routes from its own kustomize.
+probe echo "http://$PUBLIC_IP/q/envoy/echo" 30 || \
+    echo "  (no echo route -- expected if your workload replaced y-cluster echo)"
+
+# === Stage 11: optional external HTTPS LoadBalancer ===
+# Operator-driven add-on: if TLS_DOMAINS isn't set in the env,
+# prompt for it (skip on empty input). With ASSUME_YES + TLS_DOMAINS
+# set, runs without prompting. With ASSUME_YES alone, skip silently
+# -- ASSUME_YES is for unattended e2e and we don't want to surprise
+# the operator with a billing meter they didn't ask for.
+if [[ -z "${TLS_DOMAINS:-}" && -z "${ASSUME_YES:-}" ]]; then
+    echo
+    echo "================================================================"
+    echo "Optional: external HTTPS LoadBalancer (regional, EXTERNAL_MANAGED)"
+    echo
+    echo "Sets up a regional GCP External Application Load Balancer in"
+    echo "front of $VM_NAME with a SELF-SIGNED cert covering the FQDNs"
+    echo "you specify. Useful for testing the LB+routing chain without"
+    echo "DNS or a real CA. Browsers will warn on the cert; tools need"
+    echo "--insecure / -k. Cost: ~hourly forwarding-rule + reserved IP."
+    echo
+    echo "HTTPRoutes on the cluster need spec.hostnames covering the"
+    echo "same FQDNs (the LB forwards Host: unchanged). Patch them"
+    echo "yourself before answering yes."
+    echo "================================================================"
+    read -r -p "FQDNs (comma-separated, empty to skip): " TLS_DOMAINS
+fi
+if [[ -n "${TLS_DOMAINS:-}" ]]; then
+    do_tls_frontend "$TLS_DOMAINS"
+fi
+
+# Verify hook: caller-supplied cmd runs after the GCE VM is
+# up + optional TLS LB is configured. Receives the full
+# Y_CLUSTER_CURRENT_* surface including REMOTE_VM_IP,
+# REMOTE_LB_IP (re-queried by current_env), REMOTE_DOMAINS,
+# REMOTE_SCHEME -- enough to compose curl --resolve probes
+# without /etc/hosts. Non-zero exit aborts; the VM and LB
+# stay up for inspection.
+if [[ -n "${APPLIANCE_VERIFY_CMD:-}" ]]; then
+    stage "remote verify (APPLIANCE_VERIFY_CMD)"
+    current_env
+    # Same pipefail discipline as APPLIANCE_SEED_CMD.
+    bash -c "set -o pipefail; $APPLIANCE_VERIFY_CMD"
+fi
+
+if [[ -z "${KEEP_LOCAL:-}" ]]; then
+    stage "tearing down local cluster (set KEEP_LOCAL=1 to keep it)"
+    "$Y_CLUSTER" teardown -c "$CFG_DIR" 2>/dev/null || true # y-script-lint:disable=or-true # cleanup best-effort
+fi
+
+cat <<EOF
+
+================================================================
+Appliance live in GCP.
+
+  Project:       $GCP_PROJECT
+  Zone:          $GCP_ZONE
+  VM:            $VM_NAME ($GCP_MACHINE_TYPE)
+  Public IP:     $PUBLIC_IP
+  Image:         $IMAGE_NAME (family $GCP_IMAGE_FAMILY)
+  Data disk:     $GCP_DATADIR_DISK -> /data/yolean (persistent)
+  Deliverable:   $BUNDLE_DIR
+                 ├── gcp-tar/  (uploaded to GCE, used for the
+                 │              live $VM_NAME above)
+                 └── ova/      (hand to a customer for VirtualBox /
+                                VMware -- same disk state)
+
+Connect:
+  # One-time per appliance, fetch the kubeconfig onto the laptop.
+  # (ssh can't stream a remote file to a local path AND give you
+  # an interactive shell on the same connection -- stdin/stdout
+  # is owned by the shell -- so this fetch is its own one-shot
+  # ssh, separate from the interactive one below.)
+  ssh -i $SSH_KEY ystack@$PUBLIC_IP sudo cat /etc/rancher/k3s/k3s.yaml \\
+    > ~/.kube/y-appliance-portforwarded
+
+  # Interactive shell + apiserver tunnel; the tunnel stays up
+  # until you exit the shell.
+  ssh -i $SSH_KEY -L 6443:127.0.0.1:6443 ystack@$PUBLIC_IP
+
+Teardown when done:
+  $0 teardown
+  (preserves /data/yolean by default; pass --keep-disk=false to also
+   delete the persistent disk. Teardown's exit message lists the
+   disk + recommended delete command for later cleanup.)
+================================================================
+EOF
diff --git a/scripts/e2e-appliance-export-import.sh b/scripts/e2e-appliance-export-import.sh
new file mode 100755
index 0000000..e4b5099
--- /dev/null
+++ b/scripts/e2e-appliance-export-import.sh
@@ -0,0 +1,319 @@
+#!/usr/bin/env bash
+# Round-trip an y-cluster appliance through the export/import contract:
+# build with y-cluster, install a placeholder application via kubectl,
+# prepare-export, stop, copy the qcow2, then boot a SECOND qemu
+# instance against the copy with no y-cluster involvement (simulating
+# the customer's IT importing on their hypervisor) and verify the
+# application reaches a 200 from a fresh process.
+#
+# Why this exists:
+#   The "build a per-customer appliance, ship it, customer boots it"
+#   pathway has never been e2e-tested. The Hetzner Packer flow proved
+#   snapshot+clone works on Hetzner; it doesn't tell us whether a
+#   qcow2 produced locally boots cleanly elsewhere. This script is
+#   the missing test.
+#
+# Conventions:
+#   - The application is opaque to y-cluster. We use the echo
+#     manifest as a placeholder, but install it via `y-cluster echo
+#     render | kubectl apply -f -` -- the same shape the eventual
+#     per-customer install will use (kubectl / kustomize / helm
+#     against the live cluster). y-cluster has no `echo deploy`-like
+#     special case here.
+#   - The customer-side qemu invocation is bare bash. No y-cluster
+#     binary, no seed image, no cloud-init reattach. Just qemu-system
+#     against the exported qcow2 with new port forwards. If the
+#     appliance can't survive that, prepare-export has the bug.
+#
+# Stages:
+#   1. Build the dev binary into ./dist (gitignored).
+#   2. Provision an appliance (k3s + Envoy Gateway only) under a
+#      throwaway name.
+#   3. Apply the placeholder app via kubectl.
+#   4. Smoketest curl on the build-side host.
+#   5. y-cluster stop + prepare-export.
+#   6. y-cluster export to a bundle dir (flattened qcow2 +
+#      keypair + README).
+#   7. Boot a fresh qemu against the BUNDLED qcow2 with new
+#      port forwards. The bundle has no backing-file dependency
+#      on y-cluster's cloud-image cache; this proves the disk is
+#      genuinely portable.
+#   8. Wait for ssh + curl on the imported instance.
+#   9. On failure, ssh in and dump k3s state for diagnosis.
+
+[ -z "$DEBUG" ] || set -x
+set -eo pipefail
+
+YHELP='e2e-appliance-export-import.sh - local round-trip provision -> kubectl install -> prepare-export -> stop -> raw-qemu boot -> verify
+
+Usage: e2e-appliance-export-import.sh
+
+Environment:
+  NAME             Appliance name (default: appliance-export-test)
+  APP_HTTP_PORT    Override build-side host port for guest 80 (y-cluster default: 80)
+  APP_HTTPS_PORT   Override build-side host port for guest 443 (y-cluster default: 443)
+  APP_API_PORT     Override build-side host port for guest 6443 (y-cluster default: 6443)
+  APP_SSH_PORT     Override build-side host port for guest 22 (y-cluster default: 2222)
+  IMP_HTTP_PORT    Import-side host port -> guest 80 (default: 39180)
+  IMP_SSH_PORT     Import-side host port -> guest 22 (default: 2230)
+  Y_CLUSTER        Path to dev binary (default: ./dist/y-cluster)
+  CACHE_DIR        Where y-cluster keeps its qcow2 (default: ~/.cache/y-cluster-qemu)
+  KEEP_BUILD       Set to keep the build-side cluster after success (default: tear it down)
+  DEBUG            Set non-empty for bash trace
+
+Dependencies:
+  go, qemu-system-x86_64, kubectl, ssh, ssh-keygen, curl, virt-sysprep (libguestfs-tools)
+
+Exit codes:
+  0  Round-trip succeeded; imported instance answered the smoketest
+  1  Any stage failed; build-side cluster left up for diagnosis
+'
+
+case "${1:-}" in
+  help) echo "$YHELP"; exit 0 ;;
+  --help) echo "$YHELP"; exit 0 ;;
+  -h) echo "$YHELP"; exit 0 ;;
+esac
+
+NAME="${NAME:-appliance-export-test}"
+# Import-side host ports: kept hardcoded (not env-overridable +
+# defaulted) because the import-side qemu is started directly by
+# this script (no y-cluster CLI involvement) and these values
+# can't collide with the build-side y-cluster's defaults.
+IMP_HTTP_PORT="${IMP_HTTP_PORT:-39180}"
+IMP_SSH_PORT="${IMP_SSH_PORT:-2230}"
+
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+Y_CLUSTER="${Y_CLUSTER:-$REPO_ROOT/dist/y-cluster}"
+CACHE_DIR="${CACHE_DIR:-$HOME/.cache/y-cluster-qemu}"
+EXPORT_DIR=$(mktemp -d -p /tmp e2e-export.XXXXXX)
+CFG_DIR=$(mktemp -d -p /tmp e2e-config.XXXXXX)
+
+stage() { printf '\n=== %s ===\n' "$*"; }
+
+cleanup() {
+    set +e
+    if [[ -f "$EXPORT_DIR/imported.pid" ]]; then
+        local imp_pid
+        imp_pid=$(cat "$EXPORT_DIR/imported.pid" 2>/dev/null)
+        if [[ -n "$imp_pid" ]] && kill -0 "$imp_pid" 2>/dev/null; then
+            echo "stopping imported qemu (pid $imp_pid)"
+            kill -TERM "$imp_pid" 2>/dev/null # y-script-lint:disable=or-true # not relevant here
+            sleep 2
+            kill -KILL "$imp_pid" 2>/dev/null # y-script-lint:disable=or-true # may already be gone
+        fi
+    fi
+}
+trap cleanup EXIT
+
+for tool in go qemu-system-x86_64 kubectl ssh ssh-keygen curl virt-sysprep; do
+    command -v "$tool" >/dev/null \
+        || { echo "missing required tool: $tool" >&2; exit 1; }
+done
+
+# virt-sysprep on Ubuntu fails before it touches the qcow2 if it
+# can't read /boot/vmlinuz-* (libguestfs builds a tiny appliance VM
+# with the host kernel via supermin). Ubuntu installs kernel images
+# 0600 root, so non-root invocations bail with an opaque
+# "supermin exited with error status 1". Surface the fix here.
+if ! [ -r /boot/vmlinuz-"$(uname -r)" ]; then
+    cat >&2 <<EOF
+/boot/vmlinuz-$(uname -r) is not readable; virt-sysprep will fail.
+Fix one of:
+  sudo chmod +r /boot/vmlinuz-*                                      # ephemeral
+  sudo dpkg-statoverride --update --add root root 0644 /boot/vmlinuz-$(uname -r)  # persistent across kernel updates
+EOF
+    exit 1
+fi
+
+# === 1. Build dev binary ===
+stage "building dev binary -> $Y_CLUSTER"
+mkdir -p "$(dirname "$Y_CLUSTER")"
+( cd "$REPO_ROOT" && go build -o "$Y_CLUSTER" ./cmd/y-cluster )
+
+# === 2. Provision the build-side appliance ===
+# Idempotent re-run: tear down any leftover from a prior failed run.
+stage "tearing down any leftover $NAME cluster"
+# We need the config in place for teardown to find the cluster, so
+# write it BEFORE the teardown attempt. teardown is idempotent
+# (no-op when the cluster doesn't exist) so re-entry is safe.
+# YAML emission omits any port the operator didn't override, letting
+# y-cluster's Go binary apply its own defaults (sshPort=2222,
+# portForwards={6443:6443, 80:80, 443:443}).
+{
+    echo "provider: qemu"
+    echo "name: $NAME"
+    echo "context: $NAME"
+    [ -n "${APP_SSH_PORT:-}" ] && printf 'sshPort: "%s"\n' "$APP_SSH_PORT"
+    echo 'memory: "4096"'
+    echo 'cpus: "2"'
+    echo 'diskSize: "40G"'
+    if [ -n "${APP_HTTP_PORT:-}" ] || [ -n "${APP_HTTPS_PORT:-}" ] || [ -n "${APP_API_PORT:-}" ]; then
+        echo "portForwards:"
+        [ -n "${APP_API_PORT:-}" ]   && printf '  - host: "%s"\n    guest: "6443"\n' "$APP_API_PORT"
+        [ -n "${APP_HTTP_PORT:-}" ]  && printf '  - host: "%s"\n    guest: "80"\n'   "$APP_HTTP_PORT"
+        [ -n "${APP_HTTPS_PORT:-}" ] && printf '  - host: "%s"\n    guest: "443"\n'  "$APP_HTTPS_PORT"
+    fi
+} > "$CFG_DIR/y-cluster-provision.yaml"
+
+"$Y_CLUSTER" teardown -c "$CFG_DIR" || true # y-script-lint:disable=or-true # idempotent re-entry: missing cluster is not an error
+rm -f "$CACHE_DIR/$NAME".* "$CACHE_DIR/$NAME-"*
+
+stage "provisioning appliance ($NAME) -- k3s + Envoy Gateway only"
+"$Y_CLUSTER" provision -c "$CFG_DIR"
+
+# === 3. Customer install via kubectl ===
+# This deliberately uses kubectl, not `y-cluster echo deploy`. The
+# pipeline below is exactly the shape the per-customer install path
+# will take (render manifests, kubectl apply against the live
+# cluster). y-cluster has no special case for the workload here.
+stage "installing echo workload (Envoy Gateway + HTTPRoute)"
+"$Y_CLUSTER" echo render \
+    | kubectl --context="$NAME" apply --server-side --field-manager=customer-install -f -
+kubectl --context="$NAME" -n y-cluster wait \
+    --for=condition=Available deployment/echo --timeout=180s
+
+# Stateful workload: VersityGW (S3-over-posix gateway) backed by a
+# 1Gi local-path PVC. Tests the persistence path that the simpler
+# echo workload skips.
+stage "installing VersityGW StatefulSet via yconverge"
+"$Y_CLUSTER" yconverge --context="$NAME" \
+    -k "$REPO_ROOT/testdata/appliance-stateful/base"
+
+# === 4. Build-side smoketest ===
+stage "build-side smoketest: echo + s3"
+probe() {
+    local what=$1 url=$2 attempts=${3:-30}
+    local out
+    out=$(mktemp)
+    for i in $(seq 1 "$attempts"); do
+        if curl -fsS --max-time 8 -o "$out" -w "  $what HTTP %{http_code}\n" "$url"; then
+            rm -f "$out"
+            return 0
+        fi
+        echo "  $what attempt $i/$attempts: no answer yet"
+        sleep 5
+    done
+    echo "$what smoketest never succeeded; aborting" >&2
+    rm -f "$out"
+    return 1
+}
+probe echo "http://127.0.0.1:${APP_HTTP_PORT:-80}/q/envoy/echo"
+probe s3   "http://127.0.0.1:${APP_HTTP_PORT:-80}/s3/health"
+
+# === 5. stop + prepare-export ===
+# y-cluster stop owns the graceful guest shutdown (ssh
+# poweroff -> wait for qemu exit -> SIGTERM/SIGKILL fallback).
+# Without that, qemu's SIGTERM exits in ~200ms and the guest's
+# k3s/containerd state isn't flushed, leaving zero-byte
+# overlayfs snapshot files on the qcow2 and "exec format error"
+# crash loops on the imported boot.
+stage "stopping cluster ($NAME)"
+"$Y_CLUSTER" stop --context="$NAME"
+
+stage "prepare-export ($NAME)"
+"$Y_CLUSTER" prepare-export --context="$NAME"
+
+# === 6. y-cluster export -> bundle dir ===
+# Produces a flattened, self-contained qcow2 (no backing file)
+# plus the keypair plus a README. EXPORT_DIR was created by
+# mktemp; the export subcommand refuses to write into a
+# non-empty dir, so remove that dir and re-create it after the
+# export.
+BUNDLE_DIR="$EXPORT_DIR/bundle"
+stage "exporting bundle to $BUNDLE_DIR (--format=qcow2)"
+"$Y_CLUSTER" export --context="$NAME" --format=qcow2 "$BUNDLE_DIR"
+ls -la "$BUNDLE_DIR/"
+echo "  qemu-img info on the bundled disk:"
+qemu-img info "$BUNDLE_DIR/$NAME.qcow2" | grep -E '^(file format|virtual size|disk size|backing)' | sed 's/^/    /'
+
+# === 7. Customer-side: raw qemu against the bundled disk ===
+# No y-cluster involvement here -- just qemu-system-x86_64
+# pointed at the bundled qcow2 + the bundled key. This proves
+# the bundle is genuinely self-contained: any host that can run
+# qemu (with the cloud image NOT present at the build path)
+# would boot it.
+stage "booting bundled qcow2 via raw qemu (host ports $IMP_SSH_PORT -> :22, $IMP_HTTP_PORT -> :80)"
+qemu-system-x86_64 \
+    -name "$NAME-imported" \
+    -machine accel=kvm -cpu host \
+    -smp 2 -m 4096 \
+    -drive "file=$BUNDLE_DIR/$NAME.qcow2,format=qcow2,if=virtio" \
+    -netdev "user,id=n0,hostfwd=tcp::$IMP_SSH_PORT-:22,hostfwd=tcp::$IMP_HTTP_PORT-:80" \
+    -device virtio-net-pci,netdev=n0 \
+    -serial "file:$EXPORT_DIR/console.log" \
+    -display none \
+    -daemonize \
+    -pidfile "$EXPORT_DIR/imported.pid"
+echo "  imported pid: $(cat "$EXPORT_DIR/imported.pid")"
+
+# === 8. Wait for SSH ===
+SSH_OPTS="-i $BUNDLE_DIR/$NAME-ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5"
+echo "  waiting for ssh"
+ssh_up=0
+for i in $(seq 1 60); do
+    # shellcheck disable=SC2086
+    if ssh $SSH_OPTS -p "$IMP_SSH_PORT" ystack@127.0.0.1 'true' 2>/dev/null; then
+        ssh_up=1
+        echo "  ssh up after $i tries"
+        break
+    fi
+    sleep 5
+done
+if [[ $ssh_up -eq 0 ]]; then
+    echo "imported instance ssh never came up; console log:" >&2
+    tail -50 "$EXPORT_DIR/console.log" >&2
+    exit 1
+fi
+
+# === 9. Imported smoketest ===
+# Both endpoints must come back: echo (stateless) proves the
+# Envoy Gateway data plane is up, /s3/health (StatefulSet against
+# the local-path PV that lives on the appliance disk) proves the
+# stateful workload survived the export -> bundle -> raw-qemu boot.
+stage "imported-side smoketest: echo + s3"
+imp_probe() {
+    local what=$1 url=$2 attempts=${3:-60}
+    local out
+    out=$(mktemp)
+    for i in $(seq 1 "$attempts"); do
+        if curl -fsS --max-time 8 -o "$out" -w "  $what HTTP %{http_code}\n" "$url"; then
+            echo
+            echo "=== imported $what response (head) ==="
+            head -25 "$out"
+            echo
+            rm -f "$out"
+            return 0
+        fi
+        echo "  $what attempt $i/$attempts: no answer yet"
+        sleep 5
+    done
+    rm -f "$out"
+    return 1
+}
+if imp_probe echo "http://127.0.0.1:$IMP_HTTP_PORT/q/envoy/echo" \
+    && imp_probe s3 "http://127.0.0.1:$IMP_HTTP_PORT/s3/health"; then
+    echo "=== success: round-trip works (echo + s3) ==="
+    echo "  imported echo reachable at: http://127.0.0.1:$IMP_HTTP_PORT/q/envoy/echo"
+    echo "  imported s3 reachable at:   http://127.0.0.1:$IMP_HTTP_PORT/s3/health"
+    echo "  imported ssh: ssh -p $IMP_SSH_PORT -i $BUNDLE_DIR/$NAME-ssh ystack@127.0.0.1"
+    echo "  build-side cluster preserved (KEEP_BUILD=1) -- destroy with: $Y_CLUSTER teardown -c $CFG_DIR"
+    if [[ -z "${KEEP_BUILD:-}" ]]; then
+        "$Y_CLUSTER" teardown -c "$CFG_DIR" 2>/dev/null # y-script-lint:disable=or-true # success path cleanup
+    fi
+    exit 0
+fi
+
+# === Diagnosis on failure ===
+echo >&2
+echo "imported smoketest never returned 200. Diagnostics:" >&2
+# shellcheck disable=SC2086
+ssh $SSH_OPTS -p "$IMP_SSH_PORT" ystack@127.0.0.1 \
+    'echo ===nodes===; sudo k3s kubectl get nodes -o wide;
+     echo ===pods===; sudo k3s kubectl get pods -A;
+     echo ===k3s status===; systemctl is-active k3s;
+     echo ===listen===; sudo ss -tlnp | grep -E ":(80|443|6443)\b"
+    ' >&2 # y-script-lint:disable=or-true # diagnostic best-effort
+echo "  imported ssh: ssh -p $IMP_SSH_PORT -i $BUNDLE_DIR/$NAME-ssh ystack@127.0.0.1" >&2
+echo "  console log: $EXPORT_DIR/console.log" >&2
+exit 1
diff --git a/scripts/e2e-appliance-hetzner.pkr.hcl b/scripts/e2e-appliance-hetzner.pkr.hcl
new file mode 100644
index 0000000..d65181e
--- /dev/null
+++ b/scripts/e2e-appliance-hetzner.pkr.hcl
@@ -0,0 +1,252 @@
+# Packer template that bakes a y-cluster appliance directly on
+# Hetzner Cloud and saves it as a snapshot. Replaces the older
+# dd-via-rescue path (qemu-img convert + zstd + dd /dev/sda from
+# the rescue image) which broke at the "TCP/22 reachable, no SSH
+# banner" stage we couldn't diagnose without a console.
+#
+# Why Packer + hcloud builder:
+#   - Hetzner's supported custom-image path is snapshots, not
+#     uploaded raw images. Building on Hetzner avoids the BIOS /
+#     partition table / network-driver mismatch you hit when you
+#     dd a qemu disk onto bare metal.
+#   - Packer's hcloud builder owns the lifecycle: spin a temporary
+#     server from a stock Ubuntu image, run provisioners over SSH,
+#     power off, snapshot, delete the temporary server.
+#   - The output (snapshot ID + name) feeds straight into
+#     `hcloud server create --image=<id>` for fleet rollout.
+#
+# Local appliance vs Hetzner appliance:
+#   - Local dev still uses `y-cluster provision` against qemu and
+#     prepare-export when the operator wants a portable qcow2.
+#   - Production / customer Hetzner deploys go through this Packer
+#     template instead.
+#   - Both share the workload manifests (pkg/echo/template.yaml and
+#     the upstream Envoy Gateway install) by re-running the same
+#     `y-cluster echo deploy` invocation; only the VM lifecycle
+#     diverges.
+#
+# Required: HCLOUD_TOKEN in env, var.y_cluster_binary set to a
+# linux/amd64 y-cluster build. The orchestrator script
+# (e2e-appliance-hetzner.sh) supplies both.
+
+packer {
+  required_plugins {
+    hcloud = {
+      source  = "github.com/hetznercloud/hcloud"
+      version = ">= 1.6"
+    }
+  }
+}
+
+variable "hcloud_token" {
+  type      = string
+  default   = "${env("HCLOUD_TOKEN")}"
+  sensitive = true
+}
+
+variable "snapshot_name" {
+  type    = string
+  default = "y-cluster-appliance-{{timestamp}}"
+}
+
+# cx23 = 2 vCPU / 4 GB RAM / 40 GB disk in hel1, ~€0.006/h.
+# Hetzner retired cx22 / cpx21 in EU regions during 2026; the
+# x86 shared lineup is now cx*3 / cpx*2 and cax* (Ampere arm).
+variable "server_type" {
+  type    = string
+  default = "cx23"
+}
+
+variable "location" {
+  type    = string
+  default = "hel1"
+}
+
+variable "base_image" {
+  type    = string
+  default = "ubuntu-24.04"
+}
+
+variable "k3s_version" {
+  type    = string
+  default = "v1.35.4+k3s1"
+}
+
+# Tracks pkg/provision/envoygateway/version.go's Version constant.
+# Kept independent here so `packer build` can be run against an
+# older binary if needed; the orchestrator script does NOT pin
+# them together to keep that flexibility.
+variable "envoy_gateway_version" {
+  type    = string
+  default = "v1.7.2"
+}
+
+variable "y_cluster_binary" {
+  type        = string
+  description = "Path to a linux/amd64 y-cluster binary to upload onto the build host"
+}
+
+variable "prepare_script" {
+  type        = string
+  description = "Path to pkg/provision/qemu/prepare_inguest.sh -- the shared identity-reset script that also runs against offline qcow2 disks via virt-customize"
+}
+
+# Stable k3s node-name baked into the appliance. The build host's
+# hostname is whatever Packer assigns (e.g. packer-XXXXXXXX); the
+# customer's cloned server will end up with a different hostname
+# (Hetzner sets it from the server name on first boot). Pinning
+# K3S_NODE_NAME decouples k3s identity from the OS hostname, so
+# the cloned server's k3s recognises the node entry baked into
+# the snapshot's sqlite datastore. Without this pin, every cloned
+# server registers a NEW node under its own hostname while the
+# build-host node lingers as orphan, and every workload pod stays
+# bound to the dead node.
+variable "k3s_node_name" {
+  type    = string
+  default = "appliance"
+}
+
+variable "stateful_manifest" {
+  type        = string
+  description = "Path to a pre-rendered single-file YAML for the appliance-stateful workload. Packer's file provisioner doesn't recursively upload directories cleanly across all builders, so the orchestrator script `kubectl kustomize`s testdata/appliance-stateful/base into a temp file and passes the path here."
+}
+
+variable "localstorage_manifest" {
+  type        = string
+  description = "Path to a pre-rendered local-path-provisioner manifest (output of `y-cluster localstorage render`). Same shape as stateful_manifest -- a host-rendered single yaml, applied via kubectl on the build VM."
+}
+
+source "hcloud" "appliance" {
+  token         = var.hcloud_token
+  image         = var.base_image
+  location      = var.location
+  server_type   = var.server_type
+  ssh_username  = "root"
+  snapshot_name = var.snapshot_name
+  snapshot_labels = {
+    purpose = "y-cluster-appliance"
+  }
+}
+
+build {
+  sources = ["source.hcloud.appliance"]
+
+  # Stage the y-cluster binary on the build host. Used here for
+  # `y-cluster echo deploy`; left on the appliance as a no-cost
+  # operator-inspection convenience.
+  provisioner "file" {
+    source      = var.y_cluster_binary
+    destination = "/usr/local/bin/y-cluster"
+  }
+
+  # Stage the shared identity-reset script. Same script runs on
+  # the qemu prepare-export path via virt-customize. Single
+  # source of truth for what the appliance disk looks like at
+  # snapshot time.
+  provisioner "file" {
+    source      = var.prepare_script
+    destination = "/usr/local/bin/y-cluster-prepare"
+  }
+
+  # Stage the stateful-workload manifest (VersityGW
+  # StatefulSet + Service + HTTPRoute + 1Gi local-path PVC).
+  # The file is a single rendered YAML produced by the
+  # orchestrator's `kubectl kustomize`, so this is a plain
+  # one-file scp -- no recursive directory upload, no Packer
+  # SSH-communicator quirks.
+  provisioner "file" {
+    source      = var.stateful_manifest
+    destination = "/root/appliance-stateful.yaml"
+  }
+
+  # Stage the bundled local-path-provisioner manifest
+  # (rendered by `y-cluster localstorage render` on the host).
+  # Replaces k3s's disabled local-storage addon with the
+  # appliance-shape defaults: path /data/yolean, predictable
+  # PVC namespace_name pattern, Retain reclaim.
+  provisioner "file" {
+    source      = var.localstorage_manifest
+    destination = "/root/y-cluster-localstorage.yaml"
+  }
+
+  # k3s install + workload + smoketest, all running normally.
+  # We run k3s during the build (no INSTALL_K3S_SKIP_START) so
+  # the snapshot includes a fully-converged cluster: kubeconfig,
+  # sqlite-resident workload state, pulled container images,
+  # everything.  The cloned server's k3s recognises the node
+  # entry by K3S_NODE_NAME (baked in via /etc/systemd/system/
+  # k3s.service.env) and resumes -- no orphan node, no first-boot
+  # manifests-dir reconcile loop, faster startup.
+  provisioner "shell" {
+    inline_shebang = "/bin/bash -eux"
+    environment_vars = [
+      "K3S_VERSION=${var.k3s_version}",
+      "K3S_NODE_NAME=${var.k3s_node_name}",
+      "ENVOY_GATEWAY_VERSION=${var.envoy_gateway_version}",
+      "KUBECONFIG=/etc/rancher/k3s/k3s.yaml",
+    ]
+    inline = [
+      "cloud-init status --wait",
+      "chmod +x /usr/local/bin/y-cluster /usr/local/bin/y-cluster-prepare",
+      # Install + start. K3S_NODE_NAME comes from the
+      # environment_vars block above; the install script writes
+      # it into /etc/systemd/system/k3s.service.env so the
+      # cloned server's systemd-managed k3s reads it back on
+      # cold boot.
+      # --disable=local-storage: y-cluster ships its own
+      # local-path-provisioner via the y-cluster-localstorage.yaml
+      # applied below; k3s's bundled local-storage would otherwise
+      # reconcile our ConfigMap back to the upstream defaults.
+      "curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=\"$K3S_VERSION\" INSTALL_K3S_EXEC='--disable=traefik --disable=local-storage' sh -",
+      "until kubectl get nodes 2>/dev/null | grep -qE ' Ready '; do sleep 2; done",
+      # Bundled local-path-provisioner with appliance-shape
+      # defaults (path /data/yolean, predictable PVC
+      # namespace_name pattern, Retain reclaim).
+      "kubectl apply --server-side --field-manager=y-cluster -f /root/y-cluster-localstorage.yaml",
+      "kubectl --namespace=local-path-storage rollout status deployment/local-path-provisioner --timeout=120s",
+      # Envoy Gateway upstream install + the y-cluster GatewayClass.
+      "kubectl apply --server-side -f https://github.com/envoyproxy/gateway/releases/download/$ENVOY_GATEWAY_VERSION/install.yaml",
+      "kubectl wait --namespace=envoy-gateway-system --for=condition=Available deployments --all --timeout=180s",
+      "kubectl apply --server-side -f - <<'EOF'\napiVersion: gateway.networking.k8s.io/v1\nkind: GatewayClass\nmetadata:\n  name: y-cluster\nspec:\n  controllerName: gateway.envoyproxy.io/gatewayclass-controller\nEOF",
+      # Echo workload via the standard kubectl path -- y-cluster
+      # has no special case for the customer's app.
+      "/usr/local/bin/y-cluster echo deploy --context default",
+      "kubectl --namespace=y-cluster wait --for=condition=Available deployment/echo --timeout=120s",
+      # Stateful workload: VersityGW (S3-over-posix gateway)
+      # backed by a local-path PVC. Brings up the persistent-
+      # volume code path so the snapshot includes a
+      # provisioned PV directory under /var/lib/rancher/k3s/
+      # storage, with the StatefulSet bound to it. Cloned
+      # servers' k3s recognises the same node-name (appliance)
+      # and rebinds the same PV directory -- no orphan, no
+      # re-provision.
+      "kubectl apply --server-side --field-manager=appliance-build -f /root/appliance-stateful.yaml",
+      "kubectl --namespace=appliance-stateful rollout status statefulset/versitygw --timeout=180s",
+      # In-VM smoketest: klipper-lb (k3s's bundled LoadBalancer
+      # controller) binds host port 80 on the node. Probe both
+      # the echo path and the s3 path so a build with a broken
+      # PVC, missing storage class, or mis-routed HTTPRoute
+      # fails at build time.
+      "for i in $(seq 1 60); do curl -fsS http://localhost/q/envoy/echo && break; sleep 2; done",
+      "for i in $(seq 1 60); do curl -fsS http://localhost/s3/health && break; sleep 2; done",
+    ]
+  }
+
+  # Identity reset via the shared script. Runs in the live VM
+  # against /etc/cloud/cloud.cfg.d/, /etc/netplan/, log files,
+  # bash history, etc.  Same script the qemu prepare-export
+  # runs offline; one source of truth.
+  #
+  # After the script, stop k3s gracefully so the snapshot
+  # captures a quiesced sqlite datastore. Packer's hcloud
+  # builder powers the VM off and snapshots after this
+  # provisioner returns.
+  provisioner "shell" {
+    inline_shebang = "/bin/bash -eux"
+    inline = [
+      "/usr/local/bin/y-cluster-prepare",
+      "systemctl stop k3s",
+      "sync",
+    ]
+  }
+}
diff --git a/scripts/e2e-appliance-hetzner.sh b/scripts/e2e-appliance-hetzner.sh
new file mode 100755
index 0000000..9fbc137
--- /dev/null
+++ b/scripts/e2e-appliance-hetzner.sh
@@ -0,0 +1,306 @@
+#!/usr/bin/env bash
+# e2e: build a y-cluster appliance snapshot on Hetzner Cloud via
+# Packer, boot a server from it, and verify the echo HTTPRoute
+# answers over the public IP.
+#
+# Replaces the older dd-via-rescue path (qemu-img convert + zstd +
+# dd /dev/sda from rescue mode) which broke at the "TCP/22 reachable,
+# no SSH banner" stage we couldn't diagnose without out-of-band
+# console. Packer's hcloud builder handles base-image / partition
+# layout / network drivers natively, so the path "image boots on
+# Hetzner" is no longer something we have to engineer ourselves --
+# we get it for free by building on Hetzner from the start.
+#
+# Local appliance vs Hetzner appliance:
+#   - Local dev still uses `y-cluster provision` against qemu and
+#     prepare-export when the operator wants a portable qcow2.
+#   - Production Hetzner deploys go through this script, which
+#     produces a reusable snapshot a fleet can clone from.
+#
+# Stages:
+#   1. Build a current-arch y-cluster dev binary into ./dist (the
+#      Packer template uploads it onto the build host).
+#   2. `packer init` + `packer build` of e2e-appliance-hetzner.pkr.hcl.
+#      Packer creates a temporary cx23 in hel1, runs the workload
+#      install, snapshots, and tears the temporary server down.
+#   3. Resolve the snapshot ID from `hcloud image list`.
+#   4. Create a fresh server from the snapshot (idempotent: deletes
+#      any matching $SERVER_NAME first).
+#   5. Probe http://<public-ip>/q/envoy/echo until it answers.
+#
+# Prerequisites:
+#   - HCLOUD_TOKEN sourced from $ENV_FILE (set in .env or shell env)
+#   - hcloud CLI on PATH (apt install hcloud OR snap install hcloud)
+#   - packer on PATH (apt install packer after adding HashiCorp's
+#     repo, OR download from releases.hashicorp.com)
+#   - go (to build the dev binary), curl, ssh-keygen
+#
+# On success: prints the public IP and leaves the server running so
+# the operator can poke at it. Teardown is manual:
+#   hcloud server delete $SERVER_NAME
+#   hcloud image delete <snapshot-id>     # optional: snapshot is reusable
+# The script is idempotent on re-run -- it deletes any matching
+# server/key first and starts fresh from a new snapshot.
+
+[ -z "$DEBUG" ] || set -x
+set -eo pipefail
+
+YHELP='e2e-appliance-hetzner.sh - Build a y-cluster appliance snapshot on Hetzner Cloud and verify it serves traffic
+
+Usage: e2e-appliance-hetzner.sh
+
+Environment:
+  HCLOUD_TOKEN       Hetzner Cloud API token (sourced from ENV_FILE)
+  ENV_FILE           Path to env file with HCLOUD_TOKEN (set in .env or shell env; required)
+  SERVER_NAME        Server name to create (default: y-cluster-appliance-test)
+  SERVER_TYPE        Hetzner server type (default: cx23)
+  SERVER_LOCATION    Hetzner location (default: hel1)
+  SNAPSHOT_NAME      Snapshot description used as Packer output name
+  Y_CLUSTER          Path to dev binary (default: ./dist/y-cluster)
+  DEBUG              Set non-empty to enable bash trace
+
+Dependencies:
+  packer, hcloud, go, ssh, ssh-keygen, curl
+
+Exit codes:
+  0  Success: appliance reachable on public IP
+  1  Missing prereq, packer build failure, or echo never answered
+'
+
+case "${1:-}" in
+  help) echo "$YHELP"; exit 0 ;;
+  --help) echo "$YHELP"; exit 0 ;;
+  -h) echo "$YHELP"; exit 0 ;;
+esac
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ -f "$REPO_ROOT/.env" ]]; then
+    set -o allexport; . "$REPO_ROOT/.env"; set +o allexport
+fi
+
+: "${ENV_FILE:?set ENV_FILE in .env or shell env}"
+
+if [[ ! -f "$ENV_FILE" ]]; then
+    echo "missing env file: $ENV_FILE" >&2
+    echo "expected at minimum: HCLOUD_TOKEN=<hetzner-cloud-api-token>" >&2
+    exit 1
+fi
+# shellcheck disable=SC1090
+source "$ENV_FILE"
+[[ -n "${HCLOUD_TOKEN:-}" ]] || { echo "HCLOUD_TOKEN not set in $ENV_FILE" >&2; exit 1; }
+export HCLOUD_TOKEN
+
+# Tunables. Defaults match the Packer template's; override here when
+# experimenting with alternate locations / instance types.
+SERVER_NAME="${SERVER_NAME:-y-cluster-appliance-test}"
+SERVER_TYPE="${SERVER_TYPE:-cx23}"
+SERVER_LOCATION="${SERVER_LOCATION:-hel1}"
+SNAPSHOT_NAME="${SNAPSHOT_NAME:-y-cluster-appliance-$(date -u +%Y%m%d-%H%M%S)}"
+
+Y_CLUSTER="${Y_CLUSTER:-$REPO_ROOT/dist/y-cluster}"
+PACKER_TEMPLATE="$REPO_ROOT/scripts/e2e-appliance-hetzner.pkr.hcl"
+
+for tool in packer hcloud go ssh ssh-keygen curl; do
+    command -v "$tool" >/dev/null \
+        || { echo "missing required tool: $tool" >&2; exit 1; }
+done
+
+stage() { printf '\n=== %s ===\n' "$*"; }
+
+confirm() {
+    local prompt=$1
+    if [[ -n "${ASSUME_YES:-}" ]]; then
+        echo "ASSUME_YES set; proceeding ($prompt)"
+        return 0
+    fi
+    read -r -p "$prompt [y/N] " answer
+    case "${answer,,}" in
+        y|yes) return 0 ;;
+        *) return 1 ;;
+    esac
+}
+
+cat <<'WARN'
+
+================================================================
+DEPRECATION WARNING
+
+scripts/e2e-appliance-hetzner.sh's role is changing.
+
+Today this script tests the legacy "Hetzner-as-export-mode"
+shape: build an appliance inside a Hetzner VM via Packer,
+snapshot, boot a server from the snapshot. This shape doesn't
+match the appliance contract (Hetzner Cloud has no public API
+for uploading a locally-built disk).
+
+Replacement plan:
+  - This script will be REPURPOSED once a Hetzner PROVISIONER
+    exists in pkg/provision/hetzner/ (alongside qemu / docker /
+    multipass). Repurposed scope: end-to-end test of
+    `y-cluster provision -c hetzner.yaml` -> snapshot ->
+    instantiate-from-snapshot. The Packer-build half goes away;
+    the snapshot becomes a regular y-cluster lifecycle artefact.
+  - The local-build appliance contract is moving to
+    scripts/appliance-qemu-to-gcp.sh (Hetzner's API can't
+    accept a local disk; GCP's `gcloud compute images import`
+    can).
+
+This script still runs. It still passes. But its purpose is
+about to flip; treat results from a green run today as
+"Packer build still works" rather than "appliance contract
+verified".
+================================================================
+
+WARN
+confirm "Proceed with the legacy Packer e2e anyway?" \
+    || { echo "aborted; no changes made."; exit 0; }
+
+# === 1. Build the dev binary the Packer template uploads ===
+stage "building linux/amd64 dev binary -> $Y_CLUSTER"
+mkdir -p "$(dirname "$Y_CLUSTER")"
+( cd "$REPO_ROOT" && GOOS=linux GOARCH=amd64 go build -o "$Y_CLUSTER" ./cmd/y-cluster )
+
+# === 2. render stateful manifest + packer init + build ===
+# Packer's file provisioner doesn't recursively upload
+# directories cleanly across all builder/communicator
+# combinations (hcloud's SSH communicator scp's a directory
+# argument as a single path and gets back "Is a directory").
+# Pre-render the kustomize base on the host into one yaml file
+# and ship that single file to the build VM instead. Same end
+# result, no scp recursion concerns.
+# The fixture is split into two yconverge modules (namespace
+# first, then the StatefulSet+Service+HTTPRoute) so the local
+# convergence path can express the dep with a cue import. The
+# Hetzner Packer flow doesn't run yconverge inside the build
+# VM (would need the y-cluster binary on the VM) -- it stays
+# kubectl-apply, but we render BOTH bases and concat. kubectl
+# applies a Namespace ahead of namespaced resources in the
+# same -f input, so a single concat'd file converges in the
+# right order.
+STATEFUL_MANIFEST=$(mktemp -t appliance-stateful.XXXXXX.yaml)
+{
+    kubectl kustomize "$REPO_ROOT/testdata/appliance-stateful/namespace"
+    echo '---'
+    kubectl kustomize "$REPO_ROOT/testdata/appliance-stateful/base"
+} > "$STATEFUL_MANIFEST"
+
+# y-cluster's bundled local-path-provisioner manifest (replaces
+# k3s's disabled local-storage). Rendered with the same defaults
+# the Go-side provisioners install so an appliance built via
+# Hetzner Packer ends up indistinguishable from one built locally.
+LOCALSTORAGE_MANIFEST=$(mktemp -t y-cluster-localstorage.XXXXXX.yaml)
+"$Y_CLUSTER" localstorage render > "$LOCALSTORAGE_MANIFEST"
+
+stage "packer init"
+packer init "$PACKER_TEMPLATE"
+
+stage "packer build (creates a temporary $SERVER_TYPE in $SERVER_LOCATION, snapshots, deletes)"
+packer build \
+    -var "snapshot_name=$SNAPSHOT_NAME" \
+    -var "server_type=$SERVER_TYPE" \
+    -var "location=$SERVER_LOCATION" \
+    -var "y_cluster_binary=$Y_CLUSTER" \
+    -var "prepare_script=$REPO_ROOT/pkg/provision/qemu/prepare_inguest.sh" \
+    -var "stateful_manifest=$STATEFUL_MANIFEST" \
+    -var "localstorage_manifest=$LOCALSTORAGE_MANIFEST" \
+    "$PACKER_TEMPLATE"
+
+# === 3. Resolve snapshot ID ===
+# Packer's hcloud builder prints the snapshot ID at the end of build
+# but doesn't expose it in a stable machine-readable way without a
+# manifest post-processor. hcloud image list is the simpler path.
+stage "resolving snapshot id for $SNAPSHOT_NAME"
+SNAPSHOT_ID=$(hcloud image list \
+    --type=snapshot \
+    --selector="purpose=y-cluster-appliance" \
+    --output=json \
+    | python3 -c "
+import json, sys
+images = json.load(sys.stdin)
+matches = [i for i in images if i.get('description') == '$SNAPSHOT_NAME']
+if not matches:
+    sys.exit('no snapshot named $SNAPSHOT_NAME found')
+print(matches[0]['id'])
+")
+echo "  snapshot id: $SNAPSHOT_ID"
+
+# === 4. Create a fresh ssh keypair + server from the snapshot ===
+KEY_DIR=$(mktemp -d)
+trap 'rm -rf "$KEY_DIR" "$STATEFUL_MANIFEST" "$LOCALSTORAGE_MANIFEST"' EXIT
+ssh-keygen -t ed25519 -N '' -C "$SERVER_NAME-$$" -f "$KEY_DIR/id" -q
+KEY_NAME="$SERVER_NAME"
+
+stage "tearing down any leftover server / key from a prior run"
+hcloud server delete "$SERVER_NAME" 2>/dev/null || true # y-script-lint:disable=or-true # idempotent cleanup: missing server is not an error
+hcloud ssh-key delete "$KEY_NAME" 2>/dev/null || true # y-script-lint:disable=or-true # idempotent cleanup: missing key is not an error
+
+stage "registering ssh public key as $KEY_NAME"
+hcloud ssh-key create --name "$KEY_NAME" --public-key-from-file "$KEY_DIR/id.pub" >/dev/null
+
+stage "creating $SERVER_NAME from snapshot $SNAPSHOT_ID"
+hcloud server create \
+    --name "$SERVER_NAME" \
+    --type "$SERVER_TYPE" \
+    --image "$SNAPSHOT_ID" \
+    --location "$SERVER_LOCATION" \
+    --ssh-key "$KEY_NAME" \
+    >/dev/null
+PUBLIC_IP=$(hcloud server ip "$SERVER_NAME")
+echo "  public ip: $PUBLIC_IP"
+
+# === 5. Wait for sshd, then probe the echo HTTPRoute ===
+SSH_OPTS="-i $KEY_DIR/id -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5"
+echo "  waiting for ssh on $PUBLIC_IP:22"
+for _ in $(seq 1 60); do
+    # shellcheck disable=SC2086
+    if ssh $SSH_OPTS root@"$PUBLIC_IP" 'true' 2>/dev/null; then
+        break
+    fi
+    sleep 5
+done
+
+# Cold boot from snapshot: cloud-init runs (~30s), k3s.service starts
+# for the first time, the envoy gateway controller comes up, the
+# envoy proxy data plane comes up, the VersityGW StatefulSet
+# rebinds its PV, klipper-lb binds :80. The probe loop is long
+# enough to cover the whole chain on a fresh cx23.
+probe() {
+    local what=$1 url=$2 attempts=${3:-60}
+    local out
+    out=$(mktemp)
+    for i in $(seq 1 "$attempts"); do
+        if curl -fsS --max-time 8 -o "$out" -w "  $what HTTP %{http_code}\n" "$url"; then
+            echo
+            echo "=== $what response (head) ==="
+            head -25 "$out"
+            echo
+            rm -f "$out"
+            return 0
+        fi
+        echo "  $what attempt $i/$attempts: no answer yet"
+        sleep 10
+    done
+    rm -f "$out"
+    return 1
+}
+
+stage "probing http://$PUBLIC_IP -- echo + s3"
+if probe echo "http://$PUBLIC_IP/q/envoy/echo" \
+    && probe s3 "http://$PUBLIC_IP/s3/health"; then
+    echo "=== success: cloned server serves echo + s3 ==="
+    echo "  echo: http://$PUBLIC_IP/q/envoy/echo"
+    echo "  s3:   http://$PUBLIC_IP/s3/health"
+    echo "  ssh: ssh -i $KEY_DIR/id root@$PUBLIC_IP"
+    echo "  destroy: hcloud server delete $SERVER_NAME"
+    echo "  snapshot ($SNAPSHOT_ID) preserved -- reuse with: hcloud server create --image=$SNAPSHOT_ID ..."
+    exit 0
+fi
+
+echo >&2
+echo "echo never answered within $((ATTEMPTS * 10))s. server still up for diagnosis:" >&2
+# shellcheck disable=SC2086
+ssh $SSH_OPTS root@"$PUBLIC_IP" 'systemctl is-active k3s; kubectl get pods -A 2>&1 | head -30' >&2 \
+    || true # y-script-lint:disable=or-true # diagnostic best-effort -- main failure already exits 1
+echo "  ssh: ssh -i $KEY_DIR/id root@$PUBLIC_IP" >&2
+echo "  destroy: hcloud server delete $SERVER_NAME" >&2
+exit 1
diff --git a/scripts/e2e-appliance-qemu-to-gcp.sh b/scripts/e2e-appliance-qemu-to-gcp.sh
new file mode 100755
index 0000000..073d933
--- /dev/null
+++ b/scripts/e2e-appliance-qemu-to-gcp.sh
@@ -0,0 +1,392 @@
+#!/usr/bin/env bash
+# e2e: complete qemu-to-GCP appliance workflow, non-interactive.
+#
+# This is the canonical SRE example for the appliance contract:
+# the disk we verify locally with qemu IS the disk that boots in
+# Google Compute Engine. No re-build on the cloud side; the GCS
+# tarball is exactly what `y-cluster export --format=gcp-tar`
+# produced from the local provision.
+#
+# The workflow this script documents -- in order -- is what an
+# SRE follows by hand when they want to ship a customer
+# appliance to GCP:
+#
+#   1. Bootstrap a GCP service account in the QA project (one
+#      time per project; output is a JSON key the rest of the
+#      flow consumes via GOOGLE_APPLICATION_CREDENTIALS).
+#         scripts/gcp-bootstrap-credentials.sh
+#
+#   2. Provision a y-cluster appliance locally on qemu. This
+#      gives the same k3s + Envoy Gateway + bundled local-path
+#      stack the customer will run.
+#         y-cluster provision -c <config>
+#
+#   3. Install the customer's workload(s). The e2e here uses
+#      the y-cluster echo workload + the appliance-stateful
+#      VersityGW StatefulSet as stand-ins; in real customer
+#      flows this is whatever kubectl apply / yconverge / helm
+#      the customer specifies. The Hetzner Object Storage
+#      tutorial uses VersityGW; the principle is the same.
+#         y-cluster echo render | kubectl apply -f -
+#         y-cluster yconverge -k testdata/appliance-stateful/base
+#
+#   4. Smoketest from the host. Anything that's reachable on
+#      :80 of the local qemu's port-forward is reachable on
+#      :80 of the eventual GCE VM.
+#         curl http://127.0.0.1:80/q/envoy/echo
+#
+#   5. Stop the cluster cleanly so the qcow2 is quiesced. The
+#      graceful-stop logic flushes containerd snapshot state.
+#         y-cluster stop --context=$NAME
+#
+#   6. prepare-export: virt-customize-driven identity reset
+#      (machine-id retained, ssh host keys retained, cloud-init
+#      cleaned, netplan generic-NIC match installed,
+#      systemd-timesyncd enabled). This is the step that makes
+#      the disk portable.
+#         y-cluster prepare-export --context=$NAME
+#
+#   7. Export to GCE custom-image format. Produces
+#      <bundle>/<name>.tar.gz containing exactly disk.raw.
+#         y-cluster export --context=$NAME --format=gcp-tar <bundle>
+#
+#   8. Upload to GCS. Bucket created on first run with
+#      uniform-access mode.
+#         gcloud storage cp <bundle>/<name>.tar.gz \
+#             gs://<project>-appliance-images/<image-name>.tar.gz
+#
+#   9. Create custom image from the GCS object. Direct create
+#      (no managed conversion job).
+#         gcloud compute images create <image-name> \
+#             --source-uri=gs://<project>-appliance-images/<image-name>.tar.gz
+#
+#  10. Ensure firewall opens public ports. Idempotent.
+#         gcloud compute firewall-rules create y-cluster-appliance-public ...
+#
+#  11. Create VM from the image, tagged for the firewall rule.
+#         gcloud compute instances create <vm-name> \
+#             --image=<image-name> --tags=y-cluster-appliance ...
+#
+#  12. Wait for ssh + probe HTTP. The disk we just built is the
+#      disk now booting; if smoketest passes here, it's the same
+#      smoketest that passed locally.
+#
+#  13. Teardown: delete the VM, the image, the GCS object, the
+#      local cluster. The e2e is the thing that proves the
+#      contract; we don't leave artefacts behind.
+#
+# Re-run safety: every step is idempotent. Running this twice
+# in a row produces the same result; partial-failure re-runs
+# pick up where the previous left off (fresh teardown of any
+# leftover server / image / cluster on entry).
+#
+# This script is the proof. The interactive variant is
+# scripts/appliance-qemu-to-gcp.sh -- same flow but with
+# operator prompts at the export and GCP-write boundaries.
+
+[ -z "$DEBUG" ] || set -x
+set -eo pipefail
+
+YHELP='e2e-appliance-qemu-to-gcp.sh - canonical SRE workflow: provision -> install -> verify -> prepare-export -> export gcp-tar -> upload -> image -> instance -> probe -> teardown
+
+Usage: e2e-appliance-qemu-to-gcp.sh
+
+Environment:
+  GCP_PROJECT       GCP project (set in .env or shell env; required)
+  GCP_REGION        GCP region (default: europe-north2)
+  GCP_ZONE          GCP zone (default: europe-north2-a)
+  GCP_BUCKET        GCS bucket (default: <project>-appliance-images)
+  GCP_MACHINE_TYPE  Machine type (default: e2-medium)
+  GCP_KEY           Service account JSON (set in .env or shell env; required)
+  NAME              Cluster + VM name (default: appliance-gcp-e2e)
+  KEEP              Set to skip teardown for diagnosis (default: tear down on success)
+  DEBUG             Set non-empty for bash trace
+
+Dependencies:
+  go, qemu-system-x86_64, qemu-img, kubectl, ssh, ssh-keygen, curl,
+  virt-sysprep, gcloud
+'
+
+case "${1:-}" in
+  help) echo "$YHELP"; exit 0 ;;
+  --help) echo "$YHELP"; exit 0 ;;
+  -h) echo "$YHELP"; exit 0 ;;
+esac
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ -f "$REPO_ROOT/.env" ]]; then
+    set -o allexport; . "$REPO_ROOT/.env"; set +o allexport
+fi
+
+: "${GCP_PROJECT:?set GCP_PROJECT in .env or shell env}"
+: "${GCP_KEY:?set GCP_KEY in .env or shell env}"
+
+GCP_REGION="${GCP_REGION:-europe-north2}"
+GCP_ZONE="${GCP_ZONE:-europe-north2-a}"
+GCP_BUCKET="${GCP_BUCKET:-${GCP_PROJECT}-appliance-images}"
+GCP_MACHINE_TYPE="${GCP_MACHINE_TYPE:-e2-medium}"
+
+NAME="${NAME:-appliance-gcp-e2e}"
+
+Y_CLUSTER="${Y_CLUSTER:-$REPO_ROOT/dist/y-cluster}"
+CACHE_DIR="${CACHE_DIR:-$HOME/.cache/y-cluster-qemu}"
+CFG_DIR="$HOME/.cache/y-cluster-appliance-build/$NAME"
+BUNDLE_DIR=$(mktemp -d -p "$REPO_ROOT/dist" "appliance-gcp-e2e.XXXXXX" 2>/dev/null \
+    || mktemp -d -p /tmp "appliance-gcp-e2e.XXXXXX")
+
+IMAGE_NAME="$NAME-$(date -u +%Y%m%d-%H%M%S)"
+VM_NAME="$NAME"
+
+stage() { printf '\n=== %s ===\n' "$*"; }
+
+teardown() {
+    set +e
+    if [[ -n "${KEEP:-}" ]]; then
+        echo
+        echo "KEEP set; preserving artefacts for diagnosis:"
+        echo "  local cluster: $Y_CLUSTER teardown -c $CFG_DIR"
+        echo "  GCE VM:        gcloud compute instances delete $VM_NAME --project=$GCP_PROJECT --zone=$GCP_ZONE"
+        echo "  GCE image:     gcloud compute images delete $IMAGE_NAME --project=$GCP_PROJECT"
+        echo "  GCS object:    gcloud storage rm gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz --project=$GCP_PROJECT"
+        echo "  bundle:        $BUNDLE_DIR"
+        return
+    fi
+    stage "teardown"
+    gcloud compute instances delete "$VM_NAME" \
+        --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet 2>/dev/null # y-script-lint:disable=or-true # idempotent cleanup: missing VM is not an error
+    gcloud compute images delete "$IMAGE_NAME" \
+        --project="$GCP_PROJECT" --quiet 2>/dev/null # y-script-lint:disable=or-true # idempotent cleanup: missing image is not an error
+    gcloud storage rm "gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz" \
+        --project="$GCP_PROJECT" 2>/dev/null # y-script-lint:disable=or-true # idempotent cleanup: missing object is not an error
+    "$Y_CLUSTER" teardown -c "$CFG_DIR" 2>/dev/null # y-script-lint:disable=or-true # idempotent cleanup: missing cluster is not an error
+    rm -rf "$BUNDLE_DIR"
+}
+trap teardown EXIT
+
+for tool in go qemu-system-x86_64 qemu-img kubectl ssh ssh-keygen curl virt-sysprep gcloud; do
+    command -v "$tool" >/dev/null \
+        || { echo "missing required tool: $tool" >&2; exit 1; }
+done
+
+if [[ ! -f "$GCP_KEY" ]]; then
+    echo "missing GCP key: $GCP_KEY" >&2
+    echo "create it with: scripts/gcp-bootstrap-credentials.sh" >&2
+    exit 1
+fi
+# Same schema check as the interactive flow -- mismatch on a
+# truncated / wrong-shape JSON should fail fast here rather
+# than two gcloud calls in.
+if ! jq -e 'all(.type == "service_account"; .) and (.project_id // empty | length > 0) and (.client_email // empty | length > 0) and (.private_key // empty | length > 0)' "$GCP_KEY" >/dev/null 2>&1; then
+    echo "GCP key at $GCP_KEY is missing required fields" >&2
+    echo "  expected JSON with: type=service_account, project_id, client_email, private_key" >&2
+    exit 1
+fi
+export GOOGLE_APPLICATION_CREDENTIALS="$GCP_KEY"
+
+# Acknowledge parallel composite uploads up front (silences
+# the WARNING stanza gcloud would otherwise emit on every
+# `storage cp` for files >150 MiB).
+export CLOUDSDK_STORAGE_PARALLEL_COMPOSITE_UPLOAD_ENABLED=True
+
+if ! [ -r /boot/vmlinuz-"$(uname -r)" ]; then
+    cat >&2 <<EOF
+/boot/vmlinuz-$(uname -r) is not readable; virt-sysprep will fail.
+  sudo chmod +r /boot/vmlinuz-*
+EOF
+    exit 1
+fi
+
+# === 0. Auth ===
+stage "activating GCP service account"
+gcloud auth activate-service-account --key-file="$GCP_KEY" --project="$GCP_PROJECT" >/dev/null
+
+# === 1. Build dev binary ===
+stage "building dev binary -> $Y_CLUSTER"
+mkdir -p "$(dirname "$Y_CLUSTER")"
+( cd "$REPO_ROOT" && go build -o "$Y_CLUSTER" ./cmd/y-cluster )
+
+# === 2. Provision local qemu ===
+mkdir -p "$CFG_DIR"
+# YAML emission omits any port the operator didn't override, letting
+# y-cluster's Go binary apply its own defaults (sshPort=2222,
+# portForwards={6443:6443, 80:80, 443:443}).
+{
+    echo "provider: qemu"
+    echo "name: $NAME"
+    echo "context: $NAME"
+    [ -n "${APP_SSH_PORT:-}" ] && printf 'sshPort: "%s"\n' "$APP_SSH_PORT"
+    echo 'memory: "4096"'
+    echo 'cpus: "2"'
+    echo 'diskSize: "40G"'
+    if [ -n "${APP_HTTP_PORT:-}" ] || [ -n "${APP_HTTPS_PORT:-}" ] || [ -n "${APP_API_PORT:-}" ]; then
+        echo "portForwards:"
+        [ -n "${APP_API_PORT:-}" ]   && printf '  - host: "%s"\n    guest: "6443"\n' "$APP_API_PORT"
+        [ -n "${APP_HTTP_PORT:-}" ]  && printf '  - host: "%s"\n    guest: "80"\n'   "$APP_HTTP_PORT"
+        [ -n "${APP_HTTPS_PORT:-}" ] && printf '  - host: "%s"\n    guest: "443"\n'  "$APP_HTTPS_PORT"
+    fi
+} > "$CFG_DIR/y-cluster-provision.yaml"
+
+stage "tearing down any leftover $NAME cluster"
+"$Y_CLUSTER" teardown -c "$CFG_DIR" || true # y-script-lint:disable=or-true # idempotent re-entry: missing cluster is not an error
+
+stage "provisioning $NAME (k3s + Envoy Gateway)"
+"$Y_CLUSTER" provision -c "$CFG_DIR"
+
+# === 3. Install canonical workloads ===
+stage "installing echo workload"
+"$Y_CLUSTER" echo render \
+    | kubectl --context="$NAME" apply --server-side --field-manager=customer-install -f -
+kubectl --context="$NAME" -n y-cluster wait \
+    --for=condition=Available deployment/echo --timeout=180s
+
+stage "installing VersityGW StatefulSet via yconverge"
+"$Y_CLUSTER" yconverge --context="$NAME" \
+    -k "$REPO_ROOT/testdata/appliance-stateful/base"
+
+# === 4. Local smoketest ===
+stage "local smoketest: echo + s3"
+probe_local() {
+    local what=$1 url=$2 attempts=${3:-30}
+    for i in $(seq 1 "$attempts"); do
+        if curl -fsS --max-time 8 -o /dev/null -w "  $what HTTP %{http_code}\n" "$url"; then
+            return 0
+        fi
+        echo "  $what attempt $i/$attempts"
+        sleep 5
+    done
+    return 1
+}
+probe_local echo "http://127.0.0.1:${APP_HTTP_PORT:-80}/q/envoy/echo"
+probe_local s3   "http://127.0.0.1:${APP_HTTP_PORT:-80}/s3/health"
+
+# === 5. Stop ===
+stage "stopping cluster"
+"$Y_CLUSTER" stop --context="$NAME"
+
+# === 6. prepare-export ===
+stage "prepare-export"
+"$Y_CLUSTER" prepare-export --context="$NAME"
+
+# === 7. Export to GCE-tar ===
+stage "exporting GCE-custom-image tarball -> $BUNDLE_DIR"
+# y-cluster export refuses non-empty bundle dirs; the mktemp -d
+# above created an empty dir we own, so a fresh re-run is fine.
+# On retry-after-failure paths the dir might have content from
+# the previous attempt, so we wipe + let export recreate.
+rm -rf "$BUNDLE_DIR"
+"$Y_CLUSTER" export --context="$NAME" --format=gcp-tar "$BUNDLE_DIR"
+TARBALL="$BUNDLE_DIR/$NAME.tar.gz"
+echo "  size: $(stat -c '%s' "$TARBALL" | numfmt --to=iec-i --suffix=B 2>/dev/null || stat -c '%s' "$TARBALL")"
+
+# === 8. Upload to GCS ===
+stage "ensuring bucket gs://$GCP_BUCKET ($GCP_REGION)"
+if ! gcloud storage buckets describe "gs://$GCP_BUCKET" --project="$GCP_PROJECT" >/dev/null 2>&1; then
+    gcloud storage buckets create "gs://$GCP_BUCKET" \
+        --project="$GCP_PROJECT" \
+        --location="$GCP_REGION" \
+        --uniform-bucket-level-access
+fi
+
+stage "uploading -> gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz"
+gcloud storage cp "$TARBALL" "gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz" --project="$GCP_PROJECT"
+
+# === 9. Create custom image ===
+stage "creating GCE custom image $IMAGE_NAME"
+gcloud compute images create "$IMAGE_NAME" \
+    --project="$GCP_PROJECT" \
+    --source-uri="gs://$GCP_BUCKET/$IMAGE_NAME.tar.gz" \
+    --family=y-cluster-appliance \
+    --architecture=X86_64 \
+    >/dev/null
+
+# === 10. Firewall (idempotent) ===
+FIREWALL_RULE="y-cluster-appliance-public"
+stage "ensuring firewall rule $FIREWALL_RULE"
+if ! gcloud compute firewall-rules describe "$FIREWALL_RULE" --project="$GCP_PROJECT" >/dev/null 2>&1; then
+    gcloud compute firewall-rules create "$FIREWALL_RULE" \
+        --project="$GCP_PROJECT" \
+        --direction=INGRESS \
+        --network=default \
+        --action=ALLOW \
+        --rules=tcp:80,tcp:443 \
+        --target-tags=y-cluster-appliance \
+        --source-ranges=0.0.0.0/0 \
+        >/dev/null
+fi
+
+# === 11. Create VM ===
+stage "creating $VM_NAME ($GCP_MACHINE_TYPE in $GCP_ZONE)"
+if gcloud compute instances describe "$VM_NAME" --project="$GCP_PROJECT" --zone="$GCP_ZONE" >/dev/null 2>&1; then
+    gcloud compute instances delete "$VM_NAME" \
+        --project="$GCP_PROJECT" --zone="$GCP_ZONE" --quiet >/dev/null
+fi
+gcloud compute instances create "$VM_NAME" \
+    --project="$GCP_PROJECT" \
+    --zone="$GCP_ZONE" \
+    --machine-type="$GCP_MACHINE_TYPE" \
+    --image="$IMAGE_NAME" \
+    --image-project="$GCP_PROJECT" \
+    --boot-disk-size=20GB \
+    --tags=y-cluster-appliance \
+    >/dev/null
+PUBLIC_IP=$(gcloud compute instances describe "$VM_NAME" \
+    --project="$GCP_PROJECT" \
+    --zone="$GCP_ZONE" \
+    --format='get(networkInterfaces[0].accessConfigs[0].natIP)')
+echo "  public ip: $PUBLIC_IP"
+
+# === 12. Wait for ssh + probe HTTP ===
+SSH_KEY="$CACHE_DIR/$NAME-ssh"
+SSH_OPTS="-i $SSH_KEY -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5"
+
+echo "  waiting for ssh on $PUBLIC_IP:22 (cloud-init can take 30-90s on first boot)"
+ssh_up=0
+for i in $(seq 1 60); do
+    # shellcheck disable=SC2086
+    if ssh $SSH_OPTS ystack@"$PUBLIC_IP" 'true' 2>/dev/null; then
+        echo "  ssh up after $i attempt(s)"
+        ssh_up=1
+        break
+    fi
+    echo "  ssh attempt $i/60: not yet"
+    sleep 5
+done
+[[ $ssh_up -eq 1 ]] || { echo "ssh never came up on $PUBLIC_IP" >&2; exit 1; }
+
+probe_remote() {
+    local what=$1 url=$2 attempts=${3:-60}
+    for i in $(seq 1 "$attempts"); do
+        if curl -fsS --max-time 8 -o /dev/null -w "  $what HTTP %{http_code}\n" "$url"; then
+            return 0
+        fi
+        echo "  $what attempt $i/$attempts"
+        sleep 10
+    done
+    return 1
+}
+
+stage "probing http://$PUBLIC_IP -- echo + s3 (same routes the local cluster served)"
+if probe_remote echo "http://$PUBLIC_IP/q/envoy/echo" \
+    && probe_remote s3 "http://$PUBLIC_IP/s3/health"; then
+    echo
+    echo "================================================================"
+    echo "PASS: appliance-qemu-to-gcp e2e."
+    echo
+    echo "Local-built disk booted in GCP and served the same routes that"
+    echo "the local qemu served. The appliance contract holds."
+    echo
+    echo "  Public IP:  $PUBLIC_IP"
+    echo "  SSH:        ssh -i $SSH_KEY ystack@$PUBLIC_IP"
+    echo "  echo:       http://$PUBLIC_IP/q/envoy/echo"
+    echo "  s3 health:  http://$PUBLIC_IP/s3/health"
+    echo "================================================================"
+    exit 0
+fi
+
+echo >&2
+echo "remote probes never returned; instance left for diagnosis (KEEP=1 to skip cleanup):" >&2
+# shellcheck disable=SC2086
+ssh $SSH_OPTS ystack@"$PUBLIC_IP" \
+    'sudo systemctl is-active k3s; sudo k3s kubectl get pods -A 2>&1 | head -30' >&2 \
+    || true # y-script-lint:disable=or-true # diagnostic best-effort
+exit 1
diff --git a/scripts/gcp-bootstrap-credentials.sh b/scripts/gcp-bootstrap-credentials.sh
new file mode 100755
index 0000000..fdd29c7
--- /dev/null
+++ b/scripts/gcp-bootstrap-credentials.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+# Bootstrap a service-account JSON key for automation to use
+# against a GCP project (typically your y-cluster appliance QA
+# project; see .env.example for the operator-local default).
+#
+# Run this on a machine where you're already gcloud-logged-in
+# as a project Owner (or Editor with IAM admin). It will:
+#   1. Verify your active gcloud account can act on the project.
+#   2. Enable the Compute / Storage APIs the appliance-qemu-to-gcp
+#      flow needs. (No Cloud Build: we convert qcow2 -> raw -> tar
+#      locally and use `images create --source-uri=gs://...`, which
+#      is a direct image create with no managed conversion job.)
+#   3. Create (or reuse) a service account named
+#      <SA_NAME>@<project>.iam.gserviceaccount.com.
+#   4. Grant it roles/owner on the project. (QA project; broad
+#      role keeps the bootstrap simple. Tighten later if QA gets
+#      reused for non-QA assets.)
+#   5. Generate a JSON key for the service account.
+#   6. Print the JSON between unmistakable BEGIN/END markers so
+#      you can copy-paste from your terminal scrollback to the
+#      machine that needs the credentials. The key is also left
+#      on disk at $KEY_FILE in case you'd rather scp it.
+#
+# After copying: on the other machine, save the JSON between
+# the markers (NOT the markers themselves) to a file, chmod
+# 600 it, and point GCP_KEY in $REPO_ROOT/.env at it. The
+# appliance scripts read GCP_KEY from .env.
+
+[ -z "$DEBUG" ] || set -x
+set -eo pipefail
+
+YHELP='gcp-bootstrap-credentials.sh - create + grant + key a service account for the y-cluster appliance flow, then print the JSON for cross-machine copy-paste
+
+Usage: gcp-bootstrap-credentials.sh
+
+Environment:
+  GCP_PROJECT   GCP project (set in .env or shell env; required)
+  SA_NAME       Service account local part (default: y-cluster-appliance)
+  KEY_FILE      Where to write the JSON key on this machine
+                (default: ./y-cluster-gcp-key.json)
+  DEBUG         Set non-empty for bash trace
+
+Dependencies:
+  gcloud (logged in as a Project Owner or equivalent)
+'
+
+case "${1:-}" in
+  help) echo "$YHELP"; exit 0 ;;
+  --help) echo "$YHELP"; exit 0 ;;
+  -h) echo "$YHELP"; exit 0 ;;
+esac
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ -f "$REPO_ROOT/.env" ]]; then
+    set -o allexport; . "$REPO_ROOT/.env"; set +o allexport
+fi
+
+: "${GCP_PROJECT:?set GCP_PROJECT in .env or shell env}"
+PROJECT_ID="$GCP_PROJECT"
+SA_NAME="${SA_NAME:-y-cluster-appliance}"
+SA_EMAIL="${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com"
+KEY_FILE="${KEY_FILE:-./y-cluster-gcp-key.json}"
+
+command -v gcloud >/dev/null || { echo "gcloud not found on PATH" >&2; exit 1; }
+
+stage() { printf '\n=== %s ===\n' "$*"; }
+
+# 1. Verify caller is logged in and can see the project.
+stage "verifying gcloud auth + project access ($PROJECT_ID)"
+ACTIVE=$(gcloud auth list --filter=status:ACTIVE --format="value(account)" 2>/dev/null || true) # y-script-lint:disable=or-true # gcloud returns nonzero when no active account; we surface our own error below
+if [[ -z "$ACTIVE" ]]; then
+    echo "no active gcloud account; run: gcloud auth login" >&2
+    exit 1
+fi
+echo "  active account: $ACTIVE"
+gcloud projects describe "$PROJECT_ID" --format="value(projectId)" >/dev/null \
+    || { echo "cannot read project $PROJECT_ID with $ACTIVE" >&2; exit 1; }
+
+# 2. Enable required APIs. Idempotent: gcloud reports the
+# already-enabled ones as no-ops.
+stage "enabling APIs (compute, storage)"
+gcloud services enable \
+    compute.googleapis.com \
+    storage.googleapis.com \
+    --project="$PROJECT_ID"
+
+# 3. Create the service account (idempotent: skip if it
+# exists). gcloud doesn't ship a clean "create or skip", so
+# we probe first.
+stage "creating service account $SA_EMAIL (idempotent)"
+if gcloud iam service-accounts describe "$SA_EMAIL" \
+        --project="$PROJECT_ID" >/dev/null 2>&1; then
+    echo "  already exists, reusing"
+else
+    gcloud iam service-accounts create "$SA_NAME" \
+        --display-name="y-cluster appliance automation" \
+        --description="Used by scripts/appliance-qemu-to-gcp.sh to upload custom images and provision VMs in $PROJECT_ID" \
+        --project="$PROJECT_ID"
+fi
+
+# 4. Grant roles/owner on the project. QA project; broad role
+# is intentional and matches the project's stated purpose. If
+# this account ever gets reused for non-QA assets, tighten to
+# the union of: compute.admin, storage.admin,
+# iam.serviceAccountUser.
+stage "granting roles/owner on $PROJECT_ID to $SA_EMAIL"
+gcloud projects add-iam-policy-binding "$PROJECT_ID" \
+    --member="serviceAccount:$SA_EMAIL" \
+    --role="roles/owner" \
+    --project="$PROJECT_ID" \
+    --condition=None \
+    >/dev/null
+
+# 5. Mint a fresh JSON key. Each invocation creates a new key.
+# GCP allows up to 10 keys per service account; if the operator
+# is rotating, they can `gcloud iam service-accounts keys list`
+# and delete the stale ones with `keys delete`.
+stage "minting JSON key -> $KEY_FILE"
+rm -f "$KEY_FILE"
+gcloud iam service-accounts keys create "$KEY_FILE" \
+    --iam-account="$SA_EMAIL" \
+    --project="$PROJECT_ID"
+chmod 600 "$KEY_FILE"
+
+# 6. Print the JSON between markers for clipboard-friendly
+# copy. Markers are exact strings the destination machine can
+# grep for if they want to extract programmatically.
+echo
+echo "================================================================"
+echo "JSON key for $SA_EMAIL"
+echo "Project: $PROJECT_ID"
+echo
+echo "On the destination machine, save the lines BETWEEN the"
+echo "----- BEGIN ... ----- and ----- END ... ----- markers"
+echo "(NOT the markers themselves) to a file, then:"
+echo "    chmod 600 <that-file>"
+echo "    set GCP_KEY=<that-file> in \$REPO_ROOT/.env"
+echo "================================================================"
+echo
+echo "----- BEGIN GCP SERVICE ACCOUNT KEY ($SA_EMAIL) -----"
+cat "$KEY_FILE"
+echo
+echo "----- END GCP SERVICE ACCOUNT KEY ($SA_EMAIL) -----"
+echo
+echo "Local copy of the key (kept for scp / re-paste): $KEY_FILE"
+echo "To revoke this key later:"
+echo "  gcloud iam service-accounts keys list --iam-account=$SA_EMAIL --project=$PROJECT_ID"
+echo "  gcloud iam service-accounts keys delete <KEY_ID> --iam-account=$SA_EMAIL --project=$PROJECT_ID"