From 5f431c287bfd40a5c6ba6699efdb8c0b4320fc81 Mon Sep 17 00:00:00 2001 From: Arjun Komath Date: Sat, 9 May 2026 09:12:59 +1000 Subject: [PATCH 1/3] Upgrade Inngest SDK --- web/actions/backups.ts | 19 ++- web/actions/builds.ts | 37 +++-- web/actions/migrations.ts | 15 +-- web/actions/projects.ts | 19 +-- web/app/api/v1/agent/backup/complete/route.ts | 19 ++- web/app/api/v1/agent/backup/failed/route.ts | 19 ++- .../api/v1/agent/builds/[id]/status/route.ts | 6 +- .../api/v1/agent/restore/complete/route.ts | 37 +++-- .../api/v1/agent/work-queue/complete/route.ts | 19 ++- web/app/api/webhooks/github/route.ts | 10 +- web/lib/agent-status.ts | 64 ++++----- web/lib/inngest/client.ts | 8 +- web/lib/inngest/events/index.ts | 40 ++++++ web/lib/inngest/functions/backup-workflow.ts | 39 +++--- .../functions/build-trigger-workflow.ts | 12 +- web/lib/inngest/functions/build-workflow.ts | 15 ++- web/lib/inngest/functions/crons.ts | 50 +++++-- .../inngest/functions/migration-workflow.ts | 127 ++++++++++-------- .../inngest/functions/on-deployment-failed.ts | 12 +- .../functions/restore-trigger-workflow.ts | 12 +- web/lib/inngest/functions/restore-workflow.ts | 43 +++--- web/lib/inngest/functions/rollout-workflow.ts | 11 +- web/package.json | 4 +- web/pnpm-lock.yaml | 30 ++--- 24 files changed, 360 insertions(+), 307 deletions(-) diff --git a/web/actions/backups.ts b/web/actions/backups.ts index d9cae6c..b027807 100644 --- a/web/actions/backups.ts +++ b/web/actions/backups.ts @@ -7,6 +7,7 @@ import { getBackupStorageConfig } from "@/db/queries"; import { servers, volumeBackups } from "@/db/schema"; import { triggerBackup } from "@/lib/backups/trigger-backup"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; import { deleteFromS3 } from "@/lib/s3"; export async function createBackup( @@ -20,15 +21,14 @@ export async function createBackup( backupTypeOverride, }); - await inngest.send({ - name: "backup/started", - data: { + await inngest.send( + inngestEvents.backupStarted.create({ backupId: result.backupId, serviceId, volumeId, serverId: result.serverId, - }, - }); + }), + ); revalidatePath(`/dashboard/projects`); return { success: true, backupId: result.backupId }; @@ -59,14 +59,13 @@ export async function restoreBackup( backupId: string, targetServerId?: string, ) { - await inngest.send({ - name: "restore/trigger", - data: { + await inngest.send( + inngestEvents.restoreTrigger.create({ serviceId, backupId, targetServerId, - }, - }); + }), + ); revalidatePath(`/dashboard/projects`); return { success: true }; diff --git a/web/actions/builds.ts b/web/actions/builds.ts index 9c6e9db..ee2f6de 100644 --- a/web/actions/builds.ts +++ b/web/actions/builds.ts @@ -4,6 +4,7 @@ import { eq, desc } from "drizzle-orm"; import { db } from "@/db"; import { builds, githubRepos, services } from "@/db/schema"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; export async function cancelBuild(buildId: string) { const [build] = await db.select().from(builds).where(eq(builds.id, buildId)); @@ -28,13 +29,12 @@ export async function cancelBuild(buildId: string) { .set({ status: "cancelled", completedAt: new Date() }) .where(eq(builds.id, buildId)); - await inngest.send({ - name: "build/cancelled", - data: { + await inngest.send( + inngestEvents.buildCancelled.create({ buildId, buildGroupId: build.buildGroupId, - }, - }); + }), + ); return { success: true }; } @@ -50,9 +50,8 @@ export async function retryBuild(buildId: string) { throw new Error(`Cannot retry build in ${build.status} status`); } - await inngest.send({ - name: "build/trigger", - data: { + await inngest.send( + inngestEvents.buildTrigger.create({ serviceId: build.serviceId, trigger: "manual", githubRepoId: build.githubRepoId ?? undefined, @@ -60,8 +59,8 @@ export async function retryBuild(buildId: string) { commitMessage: build.commitMessage ?? "Retry build", branch: build.branch, author: build.author ?? undefined, - }, - }); + }), + ); return { success: true }; } @@ -101,9 +100,8 @@ export async function triggerBuild( .orderBy(desc(builds.createdAt)) .limit(1); - await inngest.send({ - name: "build/trigger", - data: { + await inngest.send( + inngestEvents.buildTrigger.create({ serviceId, trigger, githubRepoId: githubRepo.id, @@ -111,8 +109,8 @@ export async function triggerBuild( commitMessage: latestBuild?.commitMessage || triggerMessage, branch: latestBuild?.branch || githubRepo.deployBranch || "main", author: latestBuild?.author ?? undefined, - }, - }); + }), + ); return { success: true }; } @@ -121,16 +119,15 @@ export async function triggerBuild( throw new Error("No GitHub repository linked to this service"); } - await inngest.send({ - name: "build/trigger", - data: { + await inngest.send( + inngestEvents.buildTrigger.create({ serviceId, trigger, commitSha: "HEAD", commitMessage: triggerMessage, branch: service.githubBranch || "main", - }, - }); + }), + ); return { success: true }; } diff --git a/web/actions/migrations.ts b/web/actions/migrations.ts index 21c24aa..10ea9bf 100644 --- a/web/actions/migrations.ts +++ b/web/actions/migrations.ts @@ -7,6 +7,7 @@ import { getBackupStorageConfig } from "@/db/queries"; import { detectDatabaseType } from "@/lib/database-utils"; import { revalidatePath } from "next/cache"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; export async function startMigration( serviceId: string, @@ -86,9 +87,8 @@ export async function startMigration( }) .where(eq(services.id, serviceId)); - await inngest.send({ - name: "migration/started", - data: { + await inngest.send( + inngestEvents.migrationStarted.create({ serviceId, targetServerId, sourceServerId: deployment.serverId, @@ -96,18 +96,15 @@ export async function startMigration( sourceContainerId: deployment.containerId, volumes: volumes.map((v) => ({ id: v.id, name: v.name })), isDatabase, - }, - }); + }), + ); revalidatePath(`/dashboard/projects`); return { success: true }; } export async function cancelMigration(serviceId: string) { - await inngest.send({ - name: "migration/cancelled", - data: { serviceId }, - }); + await inngest.send(inngestEvents.migrationCancelled.create({ serviceId })); await db .update(services) diff --git a/web/actions/projects.ts b/web/actions/projects.ts index 510bfa3..abeff3c 100644 --- a/web/actions/projects.ts +++ b/web/actions/projects.ts @@ -39,6 +39,7 @@ import { allocatePort } from "@/lib/port-allocation"; import cronstrue from "cronstrue"; import { startMigration } from "./migrations"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; function isValidImageReferencePart(reference: string): boolean { const tagPattern = /^[A-Za-z0-9_][A-Za-z0-9_.-]{0,127}$/; @@ -624,13 +625,12 @@ export async function deployService(serviceId: string) { currentStage: "queued", }); - await inngest.send({ - name: "rollout/created", - data: { + await inngest.send( + inngestEvents.rolloutCreated.create({ rolloutId, serviceId, - }, - }); + }), + ); return { rolloutId }; } @@ -978,10 +978,11 @@ export async function abortRollout(serviceId: string) { return { success: false, error: "No in-progress rollout found" }; } - await inngest.send({ - name: "rollout/cancelled", - data: { rolloutId: inProgressRollout.id }, - }); + await inngest.send( + inngestEvents.rolloutCancelled.create({ + rolloutId: inProgressRollout.id, + }), + ); await db .update(deployments) diff --git a/web/app/api/v1/agent/backup/complete/route.ts b/web/app/api/v1/agent/backup/complete/route.ts index aaf64fc..fa4d026 100644 --- a/web/app/api/v1/agent/backup/complete/route.ts +++ b/web/app/api/v1/agent/backup/complete/route.ts @@ -4,6 +4,7 @@ import { volumeBackups } from "@/db/schema"; import { eq, and } from "drizzle-orm"; import { verifyAgentRequest } from "@/lib/agent-auth"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; import { revalidatePath } from "next/cache"; export async function POST(request: NextRequest) { @@ -55,26 +56,24 @@ export async function POST(request: NextRequest) { revalidatePath("/dashboard/projects"); - await inngest.send({ - name: "backup/completed", - data: { + await inngest.send( + inngestEvents.backupCompleted.create({ backupId, volumeId: backup.volumeId, serviceId: backup.serviceId, checksum, sizeBytes, isMigrationBackup: backup.isMigrationBackup ?? false, - }, - }); + }), + ); if (backup.isMigrationBackup) { - await inngest.send({ - name: "migration/backup-completed", - data: { + await inngest.send( + inngestEvents.migrationBackupCompleted.create({ backupId, serviceId: backup.serviceId, - }, - }); + }), + ); } return NextResponse.json({ ok: true }); diff --git a/web/app/api/v1/agent/backup/failed/route.ts b/web/app/api/v1/agent/backup/failed/route.ts index f43cf67..1fcb3b4 100644 --- a/web/app/api/v1/agent/backup/failed/route.ts +++ b/web/app/api/v1/agent/backup/failed/route.ts @@ -4,6 +4,7 @@ import { volumeBackups } from "@/db/schema"; import { eq, and } from "drizzle-orm"; import { verifyAgentRequest } from "@/lib/agent-auth"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; import { revalidatePath } from "next/cache"; export async function POST(request: NextRequest) { @@ -50,26 +51,24 @@ export async function POST(request: NextRequest) { revalidatePath("/dashboard/projects"); - await inngest.send({ - name: "backup/failed", - data: { + await inngest.send( + inngestEvents.backupFailed.create({ backupId, volumeId: backup.volumeId, serviceId: backup.serviceId, error: error || "Unknown error", isMigrationBackup: backup.isMigrationBackup ?? false, - }, - }); + }), + ); if (backup.isMigrationBackup) { - await inngest.send({ - name: "migration/backup-failed", - data: { + await inngest.send( + inngestEvents.migrationBackupFailed.create({ backupId, serviceId: backup.serviceId, error: error || "Unknown error", - }, - }); + }), + ); } return NextResponse.json({ ok: true }); diff --git a/web/app/api/v1/agent/builds/[id]/status/route.ts b/web/app/api/v1/agent/builds/[id]/status/route.ts index 6642ad3..f0e0a2f 100644 --- a/web/app/api/v1/agent/builds/[id]/status/route.ts +++ b/web/app/api/v1/agent/builds/[id]/status/route.ts @@ -14,6 +14,7 @@ import { updateGitHubDeploymentStatus } from "@/lib/github"; import { sendBuildFailureAlert } from "@/lib/email"; import { enqueueWork } from "@/lib/work-queue"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; type StatusUpdate = { status: "cloning" | "building" | "pushing" | "completed" | "failed"; @@ -30,10 +31,7 @@ type BuildCompletedEventData = { }; async function sendBuildCompletedEvent(data: BuildCompletedEventData) { - await inngest.send({ - name: "build/completed", - data, - }); + await inngest.send(inngestEvents.buildCompleted.create(data)); } export async function POST( diff --git a/web/app/api/v1/agent/restore/complete/route.ts b/web/app/api/v1/agent/restore/complete/route.ts index 60cbca3..44c3128 100644 --- a/web/app/api/v1/agent/restore/complete/route.ts +++ b/web/app/api/v1/agent/restore/complete/route.ts @@ -4,6 +4,7 @@ import { volumeBackups } from "@/db/schema"; import { eq } from "drizzle-orm"; import { verifyAgentRequest } from "@/lib/agent-auth"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; import { revalidatePath } from "next/cache"; export async function POST(request: NextRequest) { @@ -46,46 +47,42 @@ export async function POST(request: NextRequest) { revalidatePath("/dashboard/projects"); if (success) { - await inngest.send({ - name: "restore/completed", - data: { + await inngest.send( + inngestEvents.restoreCompleted.create({ backupId, volumeId: backup.volumeId, serviceId: backup.serviceId, isMigrationRestore: isMigration, - }, - }); + }), + ); if (isMigration) { - await inngest.send({ - name: "migration/restore-completed", - data: { + await inngest.send( + inngestEvents.migrationRestoreCompleted.create({ backupId, serviceId: backup.serviceId, - }, - }); + }), + ); } } else { - await inngest.send({ - name: "restore/failed", - data: { + await inngest.send( + inngestEvents.restoreFailed.create({ backupId, volumeId: backup.volumeId, serviceId: backup.serviceId, error: error || "Restore failed", isMigrationRestore: isMigration, - }, - }); + }), + ); if (isMigration) { - await inngest.send({ - name: "migration/restore-failed", - data: { + await inngest.send( + inngestEvents.migrationRestoreFailed.create({ backupId, serviceId: backup.serviceId, error: error || "Restore failed", - }, - }); + }), + ); } } diff --git a/web/app/api/v1/agent/work-queue/complete/route.ts b/web/app/api/v1/agent/work-queue/complete/route.ts index 83046e5..c82cc0c 100644 --- a/web/app/api/v1/agent/work-queue/complete/route.ts +++ b/web/app/api/v1/agent/work-queue/complete/route.ts @@ -4,6 +4,7 @@ import { workQueue } from "@/db/schema"; import { eq, and } from "drizzle-orm"; import { verifyAgentRequest } from "@/lib/agent-auth"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; export async function POST(request: NextRequest) { const body = await request.text(); @@ -58,24 +59,22 @@ export async function POST(request: NextRequest) { if (data.status === "completed") { if (payload.serviceId && payload.finalImageUri) { - await inngest.send({ - name: "manifest/completed", - data: { + await inngest.send( + inngestEvents.manifestCompleted.create({ serviceId: payload.serviceId, buildGroupId: payload.buildGroupId || "", imageUri: payload.finalImageUri, - }, - }); + }), + ); } } else if (data.status === "failed" && payload.serviceId) { - await inngest.send({ - name: "manifest/failed", - data: { + await inngest.send( + inngestEvents.manifestFailed.create({ serviceId: payload.serviceId, buildGroupId: payload.buildGroupId || "", error: data.error || "Manifest creation failed", - }, - }); + }), + ); } } catch (error) { console.error(`[work-queue] failed to parse payload:`, error); diff --git a/web/app/api/webhooks/github/route.ts b/web/app/api/webhooks/github/route.ts index 606f8db..cbaf324 100644 --- a/web/app/api/webhooks/github/route.ts +++ b/web/app/api/webhooks/github/route.ts @@ -13,6 +13,7 @@ import { updateGitHubDeploymentStatus, } from "@/lib/github"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; type InstallationPayload = { action: "created" | "deleted" | "suspend" | "unsuspend"; @@ -191,9 +192,8 @@ async function handlePushEvent(payload: PushPayload) { console.error("[webhook:push] failed to create GitHub deployment:", error); } - await inngest.send({ - name: "build/trigger", - data: { + await inngest.send( + inngestEvents.buildTrigger.create({ serviceId: githubRepo.serviceId, trigger: "push", githubRepoId: githubRepo.id, @@ -202,8 +202,8 @@ async function handlePushEvent(payload: PushPayload) { branch, author: head_commit.author.username || head_commit.author.name, githubDeploymentId, - }, - }); + }), + ); return NextResponse.json({ ok: true }); } diff --git a/web/lib/agent-status.ts b/web/lib/agent-status.ts index 1e451f7..ae30751 100644 --- a/web/lib/agent-status.ts +++ b/web/lib/agent-status.ts @@ -12,6 +12,7 @@ import { services, } from "@/db/schema"; import { inngest } from "@/lib/inngest/client"; +import { inngestEvents } from "@/lib/inngest/events"; import { ingestRolloutLog } from "@/lib/victoria-logs"; type ContainerStatus = { @@ -194,14 +195,13 @@ export async function applyStatusReport( if (!hasHealthCheck) { if (deployment.rolloutId) { - await inngest.send({ - name: "deployment/healthy", - data: { + await inngest.send( + inngestEvents.deploymentHealthy.create({ deploymentId: deployment.id, rolloutId: deployment.rolloutId, serviceId: deployment.serviceId, - }, - }); + }), + ); } if (service?.migrationStatus === "deploying_target") { @@ -266,27 +266,25 @@ export async function applyStatusReport( .where(eq(deployments.id, deployment.id)); if (deployment.rolloutId) { - await inngest.send({ - name: "deployment/healthy", - data: { + await inngest.send( + inngestEvents.deploymentHealthy.create({ deploymentId: deployment.id, rolloutId: deployment.rolloutId, serviceId: deployment.serviceId, - }, - }); + }), + ); } if (service?.migrationStatus === "deploying_target") { console.log( `[migration] deployment ${deployment.id} healthy (no health check), sending event`, ); - await inngest.send({ - name: "migration/deployment-healthy", - data: { + await inngest.send( + inngestEvents.migrationDeploymentHealthy.create({ deploymentId: deployment.id, serviceId: deployment.serviceId, - }, - }); + }), + ); } continue; } @@ -329,14 +327,13 @@ export async function applyStatusReport( "health_check", `Deployment ${deployment.id} is healthy`, ); - await inngest.send({ - name: "deployment/healthy", - data: { + await inngest.send( + inngestEvents.deploymentHealthy.create({ deploymentId: deployment.id, rolloutId: deployment.rolloutId, serviceId: deployment.serviceId, - }, - }); + }), + ); } const deployedService = await db @@ -349,13 +346,12 @@ export async function applyStatusReport( console.log( `[migration] deployment ${deployment.id} healthy, sending event`, ); - await inngest.send({ - name: "migration/deployment-healthy", - data: { + await inngest.send( + inngestEvents.migrationDeploymentHealthy.create({ deploymentId: deployment.id, serviceId: deployment.serviceId, - }, - }); + }), + ); } } @@ -374,15 +370,14 @@ export async function applyStatusReport( "health_check", `Deployment ${deployment.id} failed health check`, ); - await inngest.send({ - name: "deployment/failed", - data: { + await inngest.send( + inngestEvents.deploymentFailed.create({ deploymentId: deployment.id, rolloutId: deployment.rolloutId, serviceId: deployment.serviceId, reason: "health_check_failed", - }, - }); + }), + ); } } } @@ -405,13 +400,12 @@ export async function applyStatusReport( "dns_sync", `DNS synced on server ${serverId}`, ); - await inngest.send({ - name: "server/dns-synced", - data: { + await inngest.send( + inngestEvents.serverDnsSynced.create({ serverId, rolloutId: rollout.id, - }, - }); + }), + ); } } } diff --git a/web/lib/inngest/client.ts b/web/lib/inngest/client.ts index c72ff47..595bb3a 100644 --- a/web/lib/inngest/client.ts +++ b/web/lib/inngest/client.ts @@ -1,7 +1,9 @@ -import { EventSchemas, Inngest } from "inngest"; -import type { Events } from "./events"; +import { Inngest } from "inngest"; export const inngest = new Inngest({ id: "techulus-cloud", - schemas: new EventSchemas().fromRecord(), + baseUrl: process.env.INNGEST_BASE_URL, + eventKey: process.env.INNGEST_EVENT_KEY, + signingKey: process.env.INNGEST_SIGNING_KEY, + checkpointing: false, }); diff --git a/web/lib/inngest/events/index.ts b/web/lib/inngest/events/index.ts index c15c495..01a3019 100644 --- a/web/lib/inngest/events/index.ts +++ b/web/lib/inngest/events/index.ts @@ -1,3 +1,5 @@ +import { eventType, staticSchema } from "inngest"; + export type { RolloutEvents } from "./rollout"; export type { MigrationEvents } from "./migration"; export type { BackupEvents } from "./backup"; @@ -15,3 +17,41 @@ export type Events = RolloutEvents & BackupEvents & RestoreEvents & BuildEvents; + +type EventName = keyof Events & string; +type EventData = Events[TName]["data"]; + +const defineEvent = (name: TName) => + eventType(name, { schema: staticSchema>() }); + +export const inngestEvents = { + rolloutCreated: defineEvent("rollout/created"), + rolloutCancelled: defineEvent("rollout/cancelled"), + deploymentHealthy: defineEvent("deployment/healthy"), + deploymentFailed: defineEvent("deployment/failed"), + serverDnsSynced: defineEvent("server/dns-synced"), + + migrationStarted: defineEvent("migration/started"), + migrationCancelled: defineEvent("migration/cancelled"), + migrationBackupCompleted: defineEvent("migration/backup-completed"), + migrationBackupFailed: defineEvent("migration/backup-failed"), + migrationRestoreCompleted: defineEvent("migration/restore-completed"), + migrationRestoreFailed: defineEvent("migration/restore-failed"), + migrationDeploymentHealthy: defineEvent("migration/deployment-healthy"), + + backupStarted: defineEvent("backup/started"), + backupCompleted: defineEvent("backup/completed"), + backupFailed: defineEvent("backup/failed"), + + restoreTrigger: defineEvent("restore/trigger"), + restoreStarted: defineEvent("restore/started"), + restoreCompleted: defineEvent("restore/completed"), + restoreFailed: defineEvent("restore/failed"), + + buildTrigger: defineEvent("build/trigger"), + buildStarted: defineEvent("build/started"), + buildCancelled: defineEvent("build/cancelled"), + buildCompleted: defineEvent("build/completed"), + manifestCompleted: defineEvent("manifest/completed"), + manifestFailed: defineEvent("manifest/failed"), +}; diff --git a/web/lib/inngest/functions/backup-workflow.ts b/web/lib/inngest/functions/backup-workflow.ts index 330a819..e8f3e33 100644 --- a/web/lib/inngest/functions/backup-workflow.ts +++ b/web/lib/inngest/functions/backup-workflow.ts @@ -2,32 +2,35 @@ import { eq } from "drizzle-orm"; import { db } from "@/db"; import { volumeBackups } from "@/db/schema"; import { inngest } from "../client"; +import { inngestEvents } from "../events"; export const backupWorkflow = inngest.createFunction( { id: "backup-workflow", + triggers: [inngestEvents.backupStarted], }, - { event: "backup/started" }, - async ({ event, step }) => { + async ({ event, step, group }) => { const { backupId } = event.data; - const completedPromise = step - .waitForEvent("wait-backup-completed", { - event: "backup/completed", - timeout: "30m", - if: `async.data.backupId == "${backupId}"`, - }) - .then((result) => ({ status: "completed" as const, result })); + const outcome = await group.parallel(() => { + const completedPromise = step + .waitForEvent("wait-backup-completed", { + event: inngestEvents.backupCompleted, + timeout: "30m", + if: `async.data.backupId == "${backupId}"`, + }) + .then((result) => ({ status: "completed" as const, result })); - const failedPromise = step - .waitForEvent("wait-backup-failed", { - event: "backup/failed", - timeout: "30m", - if: `async.data.backupId == "${backupId}"`, - }) - .then((result) => ({ status: "failed" as const, result })); + const failedPromise = step + .waitForEvent("wait-backup-failed", { + event: inngestEvents.backupFailed, + timeout: "30m", + if: `async.data.backupId == "${backupId}"`, + }) + .then((result) => ({ status: "failed" as const, result })); - const outcome = await Promise.race([completedPromise, failedPromise]); + return Promise.race([completedPromise, failedPromise]); + }); if (!outcome.result) { await step.run("handle-backup-timeout", async () => { @@ -58,8 +61,8 @@ export const backupWorkflow = inngest.createFunction( export const onBackupFailed = inngest.createFunction( { id: "on-backup-failed", + triggers: [inngestEvents.backupFailed], }, - { event: "backup/failed" }, async ({ event }) => { const { backupId, error } = event.data; return { status: "failed", backupId, error }; diff --git a/web/lib/inngest/functions/build-trigger-workflow.ts b/web/lib/inngest/functions/build-trigger-workflow.ts index 43fe37f..0e9fb2c 100644 --- a/web/lib/inngest/functions/build-trigger-workflow.ts +++ b/web/lib/inngest/functions/build-trigger-workflow.ts @@ -2,6 +2,7 @@ import { randomUUID } from "node:crypto"; import { db } from "@/db"; import { builds } from "@/db/schema"; import { inngest } from "../client"; +import { inngestEvents } from "../events"; import { selectBuildServerForPlatform, getTargetPlatformsForService, @@ -11,9 +12,9 @@ import { enqueueWork } from "@/lib/work-queue"; export const buildTriggerWorkflow = inngest.createFunction( { id: "build-trigger-workflow", + triggers: [inngestEvents.buildTrigger], concurrency: [{ limit: 1, key: "event.data.serviceId" }], }, - { event: "build/trigger" }, async ({ event, step }) => { const { serviceId, @@ -62,14 +63,13 @@ export const buildTriggerWorkflow = inngest.createFunction( ); await step.run("send-build-started", async () => { - await inngest.send({ - name: "build/started", - data: { + await inngest.send( + inngestEvents.buildStarted.create({ buildId: buildIds[0], serviceId, buildGroupId, - }, - }); + }), + ); }); return { status: "triggered", buildIds, buildGroupId }; diff --git a/web/lib/inngest/functions/build-workflow.ts b/web/lib/inngest/functions/build-workflow.ts index 50d0001..0c056b4 100644 --- a/web/lib/inngest/functions/build-workflow.ts +++ b/web/lib/inngest/functions/build-workflow.ts @@ -2,21 +2,24 @@ import { eq, and } from "drizzle-orm"; import { db } from "@/db"; import { builds, services, projects, serviceReplicas } from "@/db/schema"; import { inngest } from "../client"; +import { inngestEvents } from "../events"; import { deployService } from "@/actions/projects"; export const buildWorkflow = inngest.createFunction( { id: "build-workflow", + triggers: [inngestEvents.buildStarted], concurrency: [{ limit: 1, key: "event.data.serviceId" }], - cancelOn: [{ event: "build/cancelled", match: "data.buildGroupId" }], + cancelOn: [ + { event: inngestEvents.buildCancelled, match: "data.buildGroupId" }, + ], }, - { event: "build/started" }, async ({ event, step }) => { const { buildId, serviceId, buildGroupId } = event.data; if (!buildGroupId) { const result = await step.waitForEvent("wait-single-build", { - event: "build/completed", + event: inngestEvents.buildCompleted, timeout: "60m", if: `async.data.buildId == "${buildId}"`, }); @@ -40,7 +43,7 @@ export const buildWorkflow = inngest.createFunction( } const manifestResult = await step.waitForEvent("wait-manifest", { - event: "manifest/completed", + event: inngestEvents.manifestCompleted, timeout: "10m", if: `async.data.serviceId == "${serviceId}"`, }); @@ -85,7 +88,7 @@ export const buildWorkflow = inngest.createFunction( const buildResults = await Promise.all( groupBuilds.map((build) => step.waitForEvent(`wait-build-${build.id}`, { - event: "build/completed", + event: inngestEvents.buildCompleted, timeout: "60m", if: `async.data.buildId == "${build.id}"`, }), @@ -118,7 +121,7 @@ export const buildWorkflow = inngest.createFunction( } const manifestResult = await step.waitForEvent("wait-group-manifest", { - event: "manifest/completed", + event: inngestEvents.manifestCompleted, timeout: "10m", if: `async.data.buildGroupId == "${buildGroupId}"`, }); diff --git a/web/lib/inngest/functions/crons.ts b/web/lib/inngest/functions/crons.ts index d3352bb..570835d 100644 --- a/web/lib/inngest/functions/crons.ts +++ b/web/lib/inngest/functions/crons.ts @@ -8,11 +8,15 @@ import { checkAndRunScheduledDeployments, cleanupStaleItems, } from "@/lib/scheduler"; +import { cron } from "inngest"; import { inngest } from "../client"; export const staleServerCheck = inngest.createFunction( - { id: "cron-stale-server-check", singleton: { mode: "skip" } }, - { cron: "*/5 * * * *" }, + { + id: "cron-stale-server-check", + triggers: [cron("*/5 * * * *")], + singleton: { mode: "skip" }, + }, async ({ step }) => { await step.run("check-stale-servers", async () => { console.log("[cron] running stale server check"); @@ -22,8 +26,11 @@ export const staleServerCheck = inngest.createFunction( ); export const scheduledDeploymentsCheck = inngest.createFunction( - { id: "cron-scheduled-deployments", singleton: { mode: "skip" } }, - { cron: "*/15 * * * *" }, + { + id: "cron-scheduled-deployments", + triggers: [cron("*/15 * * * *")], + singleton: { mode: "skip" }, + }, async ({ step }) => { await step.run("check-scheduled-deployments", async () => { console.log("[cron] checking scheduled deployments"); @@ -33,8 +40,11 @@ export const scheduledDeploymentsCheck = inngest.createFunction( ); export const certificateRenewal = inngest.createFunction( - { id: "cron-certificate-renewal", singleton: { mode: "skip" } }, - { cron: "0 2 * * *" }, + { + id: "cron-certificate-renewal", + triggers: [cron("0 2 * * *")], + singleton: { mode: "skip" }, + }, async ({ step }) => { await step.run("renew-certificates", async () => { console.log("[cron] checking for expiring certificates"); @@ -44,8 +54,11 @@ export const certificateRenewal = inngest.createFunction( ); export const challengeCleanup = inngest.createFunction( - { id: "cron-challenge-cleanup", singleton: { mode: "skip" } }, - { cron: "0 * * * *" }, + { + id: "cron-challenge-cleanup", + triggers: [cron("0 * * * *")], + singleton: { mode: "skip" }, + }, async ({ step }) => { await step.run("cleanup-challenges", async () => { await cleanupExpiredChallenges(); @@ -54,8 +67,11 @@ export const challengeCleanup = inngest.createFunction( ); export const scheduledBackupsCheck = inngest.createFunction( - { id: "cron-scheduled-backups", singleton: { mode: "skip" } }, - { cron: "*/15 * * * *" }, + { + id: "cron-scheduled-backups", + triggers: [cron("*/15 * * * *")], + singleton: { mode: "skip" }, + }, async ({ step }) => { await step.run("check-scheduled-backups", async () => { console.log("[cron] checking scheduled backups"); @@ -65,8 +81,11 @@ export const scheduledBackupsCheck = inngest.createFunction( ); export const oldBackupsCleanup = inngest.createFunction( - { id: "cron-old-backups-cleanup", singleton: { mode: "skip" } }, - { cron: "0 3 * * *" }, + { + id: "cron-old-backups-cleanup", + triggers: [cron("0 3 * * *")], + singleton: { mode: "skip" }, + }, async ({ step }) => { await step.run("cleanup-old-backups", async () => { console.log("[cron] cleaning up old backups"); @@ -76,8 +95,11 @@ export const oldBackupsCleanup = inngest.createFunction( ); export const staleItemsCleanup = inngest.createFunction( - { id: "cron-stale-items-cleanup", singleton: { mode: "skip" } }, - { cron: "*/15 * * * *" }, + { + id: "cron-stale-items-cleanup", + triggers: [cron("*/15 * * * *")], + singleton: { mode: "skip" }, + }, async ({ step }) => { await step.run("cleanup-stale-items", async () => { console.log("[cron] cleaning up stale items"); diff --git a/web/lib/inngest/functions/migration-workflow.ts b/web/lib/inngest/functions/migration-workflow.ts index 8b4efbe..41d39d6 100644 --- a/web/lib/inngest/functions/migration-workflow.ts +++ b/web/lib/inngest/functions/migration-workflow.ts @@ -12,15 +12,18 @@ import { getBackupStorageConfig } from "@/db/queries"; import { detectDatabaseType } from "@/lib/database-utils"; import { enqueueWork } from "@/lib/work-queue"; import { inngest } from "../client"; +import { inngestEvents } from "../events"; import { deployService } from "@/actions/projects"; export const migrationWorkflow = inngest.createFunction( { id: "migration-workflow", - cancelOn: [{ event: "migration/cancelled", match: "data.serviceId" }], + triggers: [inngestEvents.migrationStarted], + cancelOn: [ + { event: inngestEvents.migrationCancelled, match: "data.serviceId" }, + ], }, - { event: "migration/started" }, - async ({ event, step }) => { + async ({ event, step, group }) => { const { serviceId, targetServerId, @@ -123,25 +126,27 @@ export const migrationWorkflow = inngest.createFunction( }); const backupResults = await Promise.all( - backupIds.map((backupId) => { - const completedPromise = step - .waitForEvent(`wait-backup-${backupId}`, { - event: "migration/backup-completed", - timeout: "30m", - if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, - }) - .then((result) => ({ status: "completed" as const, result })); + backupIds.map((backupId) => + group.parallel(() => { + const completedPromise = step + .waitForEvent(`wait-backup-${backupId}`, { + event: inngestEvents.migrationBackupCompleted, + timeout: "30m", + if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, + }) + .then((result) => ({ status: "completed" as const, result })); - const failedPromise = step - .waitForEvent(`wait-backup-failed-${backupId}`, { - event: "migration/backup-failed", - timeout: "30m", - if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, - }) - .then((result) => ({ status: "failed" as const, result })); + const failedPromise = step + .waitForEvent(`wait-backup-failed-${backupId}`, { + event: inngestEvents.migrationBackupFailed, + timeout: "30m", + if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, + }) + .then((result) => ({ status: "failed" as const, result })); - return Promise.race([completedPromise, failedPromise]); - }), + return Promise.race([completedPromise, failedPromise]); + }), + ), ); const backupTimedOut = backupResults.some((r) => r.result === null); @@ -234,25 +239,27 @@ export const migrationWorkflow = inngest.createFunction( }); const restoreResults = await Promise.all( - backupIds.map((backupId) => { - const completedPromise = step - .waitForEvent(`wait-restore-${backupId}`, { - event: "migration/restore-completed", - timeout: "30m", - if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, - }) - .then((result) => ({ status: "completed" as const, result })); - - const failedPromise = step - .waitForEvent(`wait-restore-failed-${backupId}`, { - event: "migration/restore-failed", - timeout: "30m", - if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, - }) - .then((result) => ({ status: "failed" as const, result })); - - return Promise.race([completedPromise, failedPromise]); - }), + backupIds.map((backupId) => + group.parallel(() => { + const completedPromise = step + .waitForEvent(`wait-restore-${backupId}`, { + event: inngestEvents.migrationRestoreCompleted, + timeout: "30m", + if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, + }) + .then((result) => ({ status: "completed" as const, result })); + + const failedPromise = step + .waitForEvent(`wait-restore-failed-${backupId}`, { + event: inngestEvents.migrationRestoreFailed, + timeout: "30m", + if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, + }) + .then((result) => ({ status: "failed" as const, result })); + + return Promise.race([completedPromise, failedPromise]); + }), + ), ); const restoreTimedOut = restoreResults.some((r) => r.result === null); @@ -314,7 +321,7 @@ export const migrationWorkflow = inngest.createFunction( const deploymentHealthy = await step.waitForEvent( "wait-deployment-healthy", { - event: "deployment/healthy", + event: inngestEvents.deploymentHealthy, timeout: "10m", if: `async.data.serviceId == "${serviceId}"`, }, @@ -402,25 +409,27 @@ export const migrationWorkflow = inngest.createFunction( }); const restoreResults = await Promise.all( - backupIds.map((backupId) => { - const completedPromise = step - .waitForEvent(`wait-restore-${backupId}`, { - event: "migration/restore-completed", - timeout: "30m", - if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, - }) - .then((result) => ({ status: "completed" as const, result })); - - const failedPromise = step - .waitForEvent(`wait-restore-failed-${backupId}`, { - event: "migration/restore-failed", - timeout: "30m", - if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, - }) - .then((result) => ({ status: "failed" as const, result })); - - return Promise.race([completedPromise, failedPromise]); - }), + backupIds.map((backupId) => + group.parallel(() => { + const completedPromise = step + .waitForEvent(`wait-restore-${backupId}`, { + event: inngestEvents.migrationRestoreCompleted, + timeout: "30m", + if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, + }) + .then((result) => ({ status: "completed" as const, result })); + + const failedPromise = step + .waitForEvent(`wait-restore-failed-${backupId}`, { + event: inngestEvents.migrationRestoreFailed, + timeout: "30m", + if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`, + }) + .then((result) => ({ status: "failed" as const, result })); + + return Promise.race([completedPromise, failedPromise]); + }), + ), ); const restoreTimedOut = restoreResults.some((r) => r.result === null); diff --git a/web/lib/inngest/functions/on-deployment-failed.ts b/web/lib/inngest/functions/on-deployment-failed.ts index f7c18bd..fbe2cf1 100644 --- a/web/lib/inngest/functions/on-deployment-failed.ts +++ b/web/lib/inngest/functions/on-deployment-failed.ts @@ -2,12 +2,12 @@ import { eq } from "drizzle-orm"; import { db } from "@/db"; import { rollouts } from "@/db/schema"; import { inngest } from "../client"; +import { inngestEvents } from "../events"; import { handleRolloutFailure } from "./rollout-utils"; import { ingestRolloutLog } from "@/lib/victoria-logs"; export const onDeploymentFailed = inngest.createFunction( - { id: "on-deployment-failed" }, - { event: "deployment/failed" }, + { id: "on-deployment-failed", triggers: [inngestEvents.deploymentFailed] }, async ({ event, step }) => { const { rolloutId, serviceId, reason } = event.data; @@ -28,10 +28,10 @@ export const onDeploymentFailed = inngest.createFunction( `Rollout failed: ${reason}`, ); - await step.sendEvent("cancel-rollout", { - name: "rollout/cancelled", - data: { rolloutId }, - }); + await step.sendEvent( + "cancel-rollout", + inngestEvents.rolloutCancelled.create({ rolloutId }), + ); await step.run("handle-failure", async () => { await handleRolloutFailure(rolloutId, serviceId, reason, true); diff --git a/web/lib/inngest/functions/restore-trigger-workflow.ts b/web/lib/inngest/functions/restore-trigger-workflow.ts index 9e7b8f3..ec1d617 100644 --- a/web/lib/inngest/functions/restore-trigger-workflow.ts +++ b/web/lib/inngest/functions/restore-trigger-workflow.ts @@ -4,6 +4,7 @@ import { volumeBackups, services, deployments } from "@/db/schema"; import { getBackupStorageConfig } from "@/db/queries"; import { enqueueWork } from "@/lib/work-queue"; import { inngest } from "../client"; +import { inngestEvents } from "../events"; function detectBackupTypeFromPath(storagePath: string): "volume" | "database" { if (storagePath.endsWith(".tar.gz")) return "volume"; @@ -17,8 +18,8 @@ function detectBackupTypeFromPath(storagePath: string): "volume" | "database" { export const restoreTriggerWorkflow = inngest.createFunction( { id: "restore-trigger-workflow", + triggers: [inngestEvents.restoreTrigger], }, - { event: "restore/trigger" }, async ({ event, step }) => { const { serviceId, backupId, targetServerId } = event.data; @@ -101,14 +102,13 @@ export const restoreTriggerWorkflow = inngest.createFunction( }); await step.run("send-restore-started", async () => { - await inngest.send({ - name: "restore/started", - data: { + await inngest.send( + inngestEvents.restoreStarted.create({ backupId, serviceId, serverId, - }, - }); + }), + ); }); return { status: "triggered", backupId }; diff --git a/web/lib/inngest/functions/restore-workflow.ts b/web/lib/inngest/functions/restore-workflow.ts index ac2ff4d..9712835 100644 --- a/web/lib/inngest/functions/restore-workflow.ts +++ b/web/lib/inngest/functions/restore-workflow.ts @@ -1,30 +1,33 @@ import { inngest } from "../client"; +import { inngestEvents } from "../events"; export const restoreWorkflow = inngest.createFunction( { id: "restore-workflow", + triggers: [inngestEvents.restoreStarted], }, - { event: "restore/started" }, - async ({ event, step }) => { + async ({ event, step, group }) => { const { backupId } = event.data; - const completedPromise = step - .waitForEvent("wait-restore-completed", { - event: "restore/completed", - timeout: "30m", - if: `async.data.backupId == "${backupId}"`, - }) - .then((result) => ({ status: "completed" as const, result })); - - const failedPromise = step - .waitForEvent("wait-restore-failed", { - event: "restore/failed", - timeout: "30m", - if: `async.data.backupId == "${backupId}"`, - }) - .then((result) => ({ status: "failed" as const, result })); - - const outcome = await Promise.race([completedPromise, failedPromise]); + const outcome = await group.parallel(() => { + const completedPromise = step + .waitForEvent("wait-restore-completed", { + event: inngestEvents.restoreCompleted, + timeout: "30m", + if: `async.data.backupId == "${backupId}"`, + }) + .then((result) => ({ status: "completed" as const, result })); + + const failedPromise = step + .waitForEvent("wait-restore-failed", { + event: inngestEvents.restoreFailed, + timeout: "30m", + if: `async.data.backupId == "${backupId}"`, + }) + .then((result) => ({ status: "failed" as const, result })); + + return Promise.race([completedPromise, failedPromise]); + }); if (!outcome.result) { return { status: "failed", reason: "timeout", backupId }; @@ -45,8 +48,8 @@ export const restoreWorkflow = inngest.createFunction( export const onRestoreFailed = inngest.createFunction( { id: "on-restore-failed", + triggers: [inngestEvents.restoreFailed], }, - { event: "restore/failed" }, async ({ event, step }) => { const { backupId, error } = event.data; return { status: "failed", backupId, error }; diff --git a/web/lib/inngest/functions/rollout-workflow.ts b/web/lib/inngest/functions/rollout-workflow.ts index 7a31a93..95f9a1b 100644 --- a/web/lib/inngest/functions/rollout-workflow.ts +++ b/web/lib/inngest/functions/rollout-workflow.ts @@ -4,6 +4,7 @@ import { getService } from "@/db/queries"; import { deployments, rollouts } from "@/db/schema"; import { ingestRolloutLog } from "@/lib/victoria-logs"; import { inngest } from "../client"; +import { inngestEvents } from "../events"; import { calculateServicePlacements, checkForRollingUpdate, @@ -20,10 +21,12 @@ import { handleRolloutFailure } from "./rollout-utils"; export const rolloutWorkflow = inngest.createFunction( { id: "rollout-workflow", + triggers: [inngestEvents.rolloutCreated], concurrency: [{ limit: 1, key: "event.data.serviceId" }], - cancelOn: [{ event: "rollout/cancelled", match: "data.rolloutId" }], + cancelOn: [ + { event: inngestEvents.rolloutCancelled, match: "data.rolloutId" }, + ], }, - { event: "rollout/created" }, async ({ event, step }) => { const { rolloutId, serviceId } = event.data; @@ -235,7 +238,7 @@ export const rolloutWorkflow = inngest.createFunction( const healthResults = await Promise.all( pendingHealthDeploymentIds.map((deploymentId) => step.waitForEvent(`wait-healthy-${deploymentId}`, { - event: "deployment/healthy", + event: inngestEvents.deploymentHealthy, timeout: "10m", if: `async.data.deploymentId == "${deploymentId}"`, }), @@ -309,7 +312,7 @@ export const rolloutWorkflow = inngest.createFunction( const dnsResults = await Promise.all( serverIds.map((serverId) => step.waitForEvent(`wait-dns-${serverId}`, { - event: "server/dns-synced", + event: inngestEvents.serverDnsSynced, timeout: "5m", if: `async.data.serverId == "${serverId}" && async.data.rolloutId == "${rolloutId}"`, }), diff --git a/web/package.json b/web/package.json index a0c83bc..d5d7a8b 100644 --- a/web/package.json +++ b/web/package.json @@ -3,7 +3,7 @@ "version": "0.1.0", "private": true, "scripts": { - "dev": "portless cloud next dev", + "dev": "portless cloud --app-port 3000 next dev", "build": "next build", "start": "next start", "lint": "next lint", @@ -26,7 +26,7 @@ "cron-parser": "^5.4.0", "cronstrue": "^3.9.0", "drizzle-orm": "^0.45.1", - "inngest": "^3.54.2", + "inngest": "^4.3.0", "ip-address": "^10.1.0", "jose": "^6.1.3", "lucide-react": "^0.562.0", diff --git a/web/pnpm-lock.yaml b/web/pnpm-lock.yaml index 5fb9963..dd7b6bc 100644 --- a/web/pnpm-lock.yaml +++ b/web/pnpm-lock.yaml @@ -52,8 +52,8 @@ importers: specifier: ^0.45.1 version: 0.45.1(@opentelemetry/api@1.9.1)(@types/pg@8.16.0)(kysely@0.28.11)(pg@8.18.0) inngest: - specifier: ^3.54.2 - version: 3.54.2(@opentelemetry/core@2.7.1(@opentelemetry/api@1.9.1))(express@5.2.1)(hono@4.11.8)(next@16.2.1(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(typescript@5.9.3)(zod@4.3.6) + specifier: ^4.3.0 + version: 4.3.0(@opentelemetry/core@2.7.1(@opentelemetry/api@1.9.1))(express@5.2.1)(hono@4.11.8)(next@16.2.1(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react@19.2.4)(typescript@5.9.3)(zod@4.3.6) ip-address: specifier: ^10.1.0 version: 10.1.0 @@ -2982,10 +2982,6 @@ packages: ajv@8.17.1: resolution: {integrity: sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==} - ansi-regex@4.1.1: - resolution: {integrity: sha512-ILlv4k/3f6vfQ4OoP2AGvirOktlQ98ZEL1k9FaQjxa3L1abBgbuTDAdPOpvbGncC0BTVQrl+OM8xZGK6tWXt7g==} - engines: {node: '>=6'} - ansi-regex@5.0.1: resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} engines: {node: '>=8'} @@ -4126,8 +4122,8 @@ packages: inherits@2.0.4: resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} - inngest@3.54.2: - resolution: {integrity: sha512-SAAc52c7n34E9dBEapy99g0vO5MPJP2yttoZpCu3LcYy1d8tz6CrkdoEQ4FBBgzMNnQxNpeAH5lvNeFCGNXbtA==} + inngest@4.3.0: + resolution: {integrity: sha512-UYx2fFbtcEDq092yL088iEeB0j57+6fjvTbrkH3JX+c87j2YsSNVxUddr20EHzd5psXQX5CC5+39hA16OcxdzQ==} engines: {node: '>=20'} peerDependencies: '@sveltejs/kit': '>=1.27.3' @@ -4139,6 +4135,7 @@ packages: hono: '>=4.2.7' koa: '>=2.14.2' next: '>=12.0.0' + react: '>=18.0.0' typescript: '>=5.8.0' zod: ^3.25.0 || ^4.0.0 peerDependenciesMeta: @@ -4160,6 +4157,8 @@ packages: optional: true next: optional: true + react: + optional: true typescript: optional: true @@ -5339,10 +5338,6 @@ packages: resolution: {integrity: sha512-zaJYxz2FtcMb4f+g60KsRNFOpVMUyuJgA51Zi5Z1DOTC3S59+OQiVOzE9GZt0x72uBGWKsQIuBKeF9iusmKFsg==} engines: {node: '>=14.16'} - strip-ansi@5.2.0: - resolution: {integrity: sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==} - engines: {node: '>=6'} - strip-ansi@6.0.1: resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} engines: {node: '>=8'} @@ -8922,8 +8917,6 @@ snapshots: json-schema-traverse: 1.0.0 require-from-string: 2.0.2 - ansi-regex@4.1.1: {} - ansi-regex@5.0.1: {} ansi-regex@6.2.2: {} @@ -10201,7 +10194,7 @@ snapshots: inherits@2.0.4: {} - inngest@3.54.2(@opentelemetry/core@2.7.1(@opentelemetry/api@1.9.1))(express@5.2.1)(hono@4.11.8)(next@16.2.1(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(typescript@5.9.3)(zod@4.3.6): + inngest@4.3.0(@opentelemetry/core@2.7.1(@opentelemetry/api@1.9.1))(express@5.2.1)(hono@4.11.8)(next@16.2.1(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react@19.2.4)(typescript@5.9.3)(zod@4.3.6): dependencies: '@bufbuild/protobuf': 2.12.0 '@inngest/ai': 0.1.7 @@ -10218,14 +10211,12 @@ snapshots: '@types/debug': 4.1.13 '@types/ms': 2.1.0 canonicalize: 1.0.8 - chalk: 4.1.2 cross-fetch: 4.1.0 debug: 4.4.3 hash.js: 1.1.7 json-stringify-safe: 5.0.1 ms: 2.1.3 serialize-error-cjs: 0.1.4 - strip-ansi: 5.2.0 temporal-polyfill: 0.2.5 ulid: 2.4.0 zod: 4.3.6 @@ -10233,6 +10224,7 @@ snapshots: express: 5.2.1 hono: 4.11.8 next: 16.2.1(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4) + react: 19.2.4 typescript: 5.9.3 transitivePeerDependencies: - '@opentelemetry/core' @@ -11461,10 +11453,6 @@ snapshots: is-obj: 3.0.0 is-regexp: 3.1.0 - strip-ansi@5.2.0: - dependencies: - ansi-regex: 4.1.1 - strip-ansi@6.0.1: dependencies: ansi-regex: 5.0.1 From 097fee14aa25ee88aa42381be884aa7c2582d9e7 Mon Sep 17 00:00:00 2001 From: Arjun Komath Date: Sun, 10 May 2026 10:05:19 +1000 Subject: [PATCH 2/3] Fix duplicate key --- web/components/service/details/pending-changes-banner.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/components/service/details/pending-changes-banner.tsx b/web/components/service/details/pending-changes-banner.tsx index 72504a0..08c908a 100644 --- a/web/components/service/details/pending-changes-banner.tsx +++ b/web/components/service/details/pending-changes-banner.tsx @@ -86,9 +86,9 @@ export const PendingChangesBanner = memo(function PendingChangesBanner({

{hasChanges ? (
- {changes.map((change) => ( + {changes.map((change, index) => (
From f0636ad36865dd75758a741e99cacca4085acc6b Mon Sep 17 00:00:00 2001 From: Arjun Komath Date: Sun, 10 May 2026 15:51:38 +1000 Subject: [PATCH 3/3] Reliability fixes Deployment Added --remove-orphans to Compose install/update/stop commands to prevent stale containers from lingering. Added fresh-host Docker log rotation config to reduce disk-fill risk. Put Traefik behind a Docker socket proxy, reducing direct Docker socket exposure. Added healthchecks to production Compose services for better operational visibility. Updated docs/examples to recommend versioned or digest-pinned images over mutable tags. Agent Preserved image digests during drift detection so digest-pinned deploys do not get collapsed to tag comparisons. Added bounded autohealing for running unhealthy containers using the existing restart work queue. Added runtime log caps for agent-launched containers. Changed image pruning to keep recent unused images for rollback safety. Added UI/import warnings for Docker socket mounts, latest, and implicit latest. --- agent/internal/agent/drift.go | 13 ++-- agent/internal/container/runtime_darwin.go | 15 +++-- agent/internal/container/runtime_linux.go | 4 +- cli/src/main.ts | 2 +- deployment/README.md | 12 +++- deployment/compose.postgres.yml | 62 +++++++++++++++++- deployment/compose.production.yml | 56 +++++++++++++++- deployment/install.sh | 40 +++++++++++- docs/installation.mdx | 23 +++++-- web/SELF-HOSTING.md | 10 ++- .../import-compose/import-compose-form.tsx | 34 +++++----- .../service/create-service-dialog.tsx | 21 ++++-- .../service/details/source-section.tsx | 13 +++- web/db/schema.ts | 6 ++ web/lib/agent-status.ts | 65 +++++++++++++++++++ web/lib/compose-parser.ts | 49 ++++++++++++-- web/lib/docker-image.ts | 18 +++++ 17 files changed, 382 insertions(+), 61 deletions(-) create mode 100644 web/lib/docker-image.ts diff --git a/agent/internal/agent/drift.go b/agent/internal/agent/drift.go index 021048c..80c7b0b 100644 --- a/agent/internal/agent/drift.go +++ b/agent/internal/agent/drift.go @@ -266,16 +266,21 @@ func (a *Agent) detectChanges(expected *agenthttp.ExpectedState, actual *ActualS } func normalizeImage(image string) string { - parts := strings.Split(image, "@") - image = parts[0] + digest := "" + if digestIndex := strings.Index(image, "@"); digestIndex != -1 { + digest = image[digestIndex:] + image = image[:digestIndex] + } image = strings.TrimPrefix(image, "docker.io/library/") image = strings.TrimPrefix(image, "docker.io/") - if !strings.Contains(image, ":") { + lastSlash := strings.LastIndex(image, "/") + lastColon := strings.LastIndex(image, ":") + if digest == "" && lastColon <= lastSlash { image = image + ":latest" } - return image + return image + digest } func (a *Agent) reconcileOne(actual *ActualState) error { diff --git a/agent/internal/container/runtime_darwin.go b/agent/internal/container/runtime_darwin.go index 59d13bc..ec068b3 100644 --- a/agent/internal/container/runtime_darwin.go +++ b/agent/internal/container/runtime_darwin.go @@ -108,6 +108,9 @@ func Deploy(config *DeployConfig) (*DeployResult, error) { "--cap-add", "SETGID", "--cap-add", "NET_BIND_SERVICE", "--cap-add", "NET_RAW", + "--log-driver", "local", + "--log-opt", "max-size=10m", + "--log-opt", "max-file=3", } args = append(args, @@ -494,15 +497,15 @@ func writeDockerConfig(registryURL, username, password string) error { } func ImagePrune() { - exec.Command("docker", "image", "prune", "-a", "-f").Run() + exec.Command("docker", "image", "prune", "-a", "-f", "--filter", "until=168h").Run() } type dockerContainer struct { - ID string `json:"ID"` - Names string `json:"Names"` - Image string `json:"Image"` - State string `json:"State"` - Labels string `json:"Labels"` + ID string `json:"ID"` + Names string `json:"Names"` + Image string `json:"Image"` + State string `json:"State"` + Labels string `json:"Labels"` } func List() ([]Container, error) { diff --git a/agent/internal/container/runtime_linux.go b/agent/internal/container/runtime_linux.go index 3175254..630f5fb 100644 --- a/agent/internal/container/runtime_linux.go +++ b/agent/internal/container/runtime_linux.go @@ -106,6 +106,8 @@ func Deploy(config *DeployConfig) (*DeployResult, error) { "--cap-add", "SETGID", "--cap-add", "NET_BIND_SERVICE", "--cap-add", "NET_RAW", + "--log-opt", "max-size=10m", + "--log-opt", "max-file=3", } args = append(args, @@ -494,7 +496,7 @@ func writeDockerConfig(registryURL, username, password string) error { } func ImagePrune() { - exec.Command("podman", "image", "prune", "-a", "-f").Run() + exec.Command("podman", "image", "prune", "-a", "-f", "--filter", "until=168h").Run() } type podmanContainer struct { diff --git a/cli/src/main.ts b/cli/src/main.ts index db36e98..fdd61ed 100644 --- a/cli/src/main.ts +++ b/cli/src/main.ts @@ -357,7 +357,7 @@ service: name: ${folderName} source: type: image - image: nginx:latest + image: nginx:1.27 replicas: count: 1 ports: diff --git a/deployment/README.md b/deployment/README.md index 39154eb..3c15252 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -8,9 +8,17 @@ Docker Compose setup with Traefik for SSL termination via Let's Encrypt. cp .env.example .env # Edit .env with your values -docker compose -f compose.production.yml up -d --pull always +docker compose -f compose.production.yml up -d --pull always --remove-orphans ``` +For production hosts, cap Docker logs in `/etc/docker/daemon.json` or use the +installer, which writes bounded `json-file` log settings on fresh Docker hosts. +Prefer versioned or digest-pinned image references over mutable tags when you +operate a long-lived deployment. + +Health checks in these Compose files are for visibility. Plain Compose reports +unhealthy containers but does not restart them automatically. + ## Services | Service | Endpoint | @@ -56,5 +64,5 @@ Schema is synced automatically on container startup via `drizzle-kit push`. This ```bash docker compose -f compose.production.yml ps docker compose -f compose.production.yml logs -f -docker compose -f compose.production.yml down +docker compose -f compose.production.yml down --remove-orphans ``` diff --git a/deployment/compose.postgres.yml b/deployment/compose.postgres.yml index e4d02bb..bd5d298 100644 --- a/deployment/compose.postgres.yml +++ b/deployment/compose.postgres.yml @@ -1,10 +1,28 @@ services: + docker-socket-proxy: + image: tecnativa/docker-socket-proxy:0.3.0 + environment: + CONTAINERS: 1 + EVENTS: 1 + INFO: 1 + NETWORKS: 1 + VERSION: 1 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:2375/version || exit 1"] + interval: 10s + timeout: 5s + retries: 6 + restart: unless-stopped + traefik: image: traefik:v3.6 env_file: - ./.env command: - "--providers.docker=true" + - "--providers.docker.endpoint=tcp://docker-socket-proxy:2375" - "--providers.docker.exposedbydefault=false" - "--entrypoints.web.address=:80" - "--entrypoints.websecure.address=:443" @@ -14,12 +32,20 @@ services: - "--certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web" - "--certificatesresolvers.letsencrypt.acme.email=${ACME_EMAIL}" - "--certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json" + - "--ping=true" ports: - "80:80" - "443:443" volumes: - letsencrypt:/letsencrypt - - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + docker-socket-proxy: + condition: service_healthy + healthcheck: + test: ["CMD", "traefik", "healthcheck", "--ping"] + interval: 30s + timeout: 5s + retries: 3 restart: unless-stopped postgres: @@ -32,6 +58,12 @@ services: - POSTGRES_DB=${POSTGRES_DB} volumes: - postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U \"$${POSTGRES_USER}\" -d \"$${POSTGRES_DB}\""] + interval: 30s + timeout: 5s + retries: 5 + start_period: 30s restart: unless-stopped web: @@ -61,6 +93,18 @@ services: - "traefik.http.routers.web.entrypoints=websecure" - "traefik.http.routers.web.tls.certresolver=letsencrypt" - "traefik.http.services.web.loadbalancer.server.port=3000" + healthcheck: + test: + [ + "CMD", + "node", + "-e", + "fetch('http://127.0.0.1:3000/api/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))", + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s restart: unless-stopped registry: @@ -75,6 +119,11 @@ services: - "traefik.http.routers.registry.entrypoints=websecure" - "traefik.http.routers.registry.tls.certresolver=letsencrypt" - "traefik.http.services.registry.loadbalancer.server.port=5000" + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:5000/v2/ || exit 1"] + interval: 30s + timeout: 10s + retries: 3 restart: unless-stopped victoria-logs: @@ -94,6 +143,11 @@ services: - "traefik.http.routers.logs.entrypoints=websecure" - "traefik.http.routers.logs.tls.certresolver=letsencrypt" - "traefik.http.services.logs.loadbalancer.server.port=9428" + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:9428/health || wget -q --spider http://127.0.0.1:9428/-/healthy"] + interval: 30s + timeout: 10s + retries: 3 restart: unless-stopped inngest: @@ -110,6 +164,12 @@ services: - "start" - "--sdk-url" - "http://web:3000/api/inngest" + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:8288/ || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s restart: unless-stopped volumes: diff --git a/deployment/compose.production.yml b/deployment/compose.production.yml index 21c8be0..bbddda8 100644 --- a/deployment/compose.production.yml +++ b/deployment/compose.production.yml @@ -1,10 +1,28 @@ services: + docker-socket-proxy: + image: tecnativa/docker-socket-proxy:0.3.0 + environment: + CONTAINERS: 1 + EVENTS: 1 + INFO: 1 + NETWORKS: 1 + VERSION: 1 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:2375/version || exit 1"] + interval: 10s + timeout: 5s + retries: 6 + restart: unless-stopped + traefik: image: traefik:v3.6 env_file: - ./.env command: - "--providers.docker=true" + - "--providers.docker.endpoint=tcp://docker-socket-proxy:2375" - "--providers.docker.exposedbydefault=false" - "--entrypoints.web.address=:80" - "--entrypoints.websecure.address=:443" @@ -15,12 +33,20 @@ services: - "--certificatesresolvers.letsencrypt.acme.email=${ACME_EMAIL}" - "--certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json" - "--entrypoints.websecure.transport.respondingTimeouts.readTimeout=600" + - "--ping=true" ports: - "80:80" - "443:443" volumes: - letsencrypt:/letsencrypt - - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + docker-socket-proxy: + condition: service_healthy + healthcheck: + test: ["CMD", "traefik", "healthcheck", "--ping"] + interval: 30s + timeout: 5s + retries: 3 restart: unless-stopped web: @@ -49,6 +75,18 @@ services: - "traefik.http.routers.web.entrypoints=websecure" - "traefik.http.routers.web.tls.certresolver=letsencrypt" - "traefik.http.services.web.loadbalancer.server.port=3000" + healthcheck: + test: + [ + "CMD", + "node", + "-e", + "fetch('http://127.0.0.1:3000/api/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))", + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s restart: unless-stopped registry: @@ -63,6 +101,11 @@ services: - "traefik.http.routers.registry.entrypoints=websecure" - "traefik.http.routers.registry.tls.certresolver=letsencrypt" - "traefik.http.services.registry.loadbalancer.server.port=5000" + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:5000/v2/ || exit 1"] + interval: 30s + timeout: 10s + retries: 3 restart: unless-stopped victoria-logs: @@ -82,6 +125,11 @@ services: - "traefik.http.routers.logs.entrypoints=websecure" - "traefik.http.routers.logs.tls.certresolver=letsencrypt" - "traefik.http.services.logs.loadbalancer.server.port=9428" + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:9428/health || wget -q --spider http://127.0.0.1:9428/-/healthy"] + interval: 30s + timeout: 10s + retries: 3 restart: unless-stopped inngest: @@ -98,6 +146,12 @@ services: - "start" - "--sdk-url" - "http://web:3000/api/inngest" + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:8288/ || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s restart: unless-stopped volumes: diff --git a/deployment/install.sh b/deployment/install.sh index dc8b993..5016aff 100755 --- a/deployment/install.sh +++ b/deployment/install.sh @@ -13,6 +13,8 @@ BOLD='\033[1m' NC='\033[0m' ENV_FILE="" +DOCKER_LOGGING_CONFIGURED=false +DOCKER_ALREADY_INSTALLED=false while [[ $# -gt 0 ]]; do case $1 in @@ -174,6 +176,7 @@ install_docker() { log_header "Docker Installation" if command -v docker &>/dev/null; then + DOCKER_ALREADY_INSTALLED=true log_success "Docker is already installed: $(docker --version)" else log_info "Docker not found, installing..." @@ -186,8 +189,15 @@ install_docker() { log_success "Docker installed successfully" fi + configure_docker_logging + systemctl enable docker >/dev/null 2>&1 - systemctl start docker + if [[ "$DOCKER_LOGGING_CONFIGURED" == "true" && "$DOCKER_ALREADY_INSTALLED" == "true" ]]; then + log_info "Restarting Docker to apply log rotation..." + systemctl restart docker + else + systemctl start docker + fi if docker compose version &>/dev/null; then log_success "Docker Compose plugin: $(docker compose version --short)" @@ -197,6 +207,30 @@ install_docker() { fi } +configure_docker_logging() { + local daemon_config="/etc/docker/daemon.json" + + if [[ -f "$daemon_config" ]]; then + log_warn "Docker daemon config already exists at ${daemon_config}; leaving it unchanged." + log_warn "Recommended log rotation: log-driver=json-file with max-size=10m and max-file=3." + return + fi + + log_info "Configuring Docker json-file log rotation..." + install -m 0755 -d /etc/docker + cat > "$daemon_config" <<'EOF' +{ + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } +} +EOF + DOCKER_LOGGING_CONFIGURED=true + log_success "Docker log rotation configured" +} + download_compose_files() { log_header "Downloading Compose Files" @@ -367,7 +401,7 @@ build_and_start() { cd "$DEPLOY_DIR" log_info "Pulling and starting services using ${COMPOSE_FILE}..." - docker compose -f "$COMPOSE_FILE" up -d --pull always + docker compose -f "$COMPOSE_FILE" up -d --pull always --remove-orphans echo "" log_header "Deployment Complete" @@ -388,7 +422,7 @@ build_and_start() { echo "" echo -e "${YELLOW}${BOLD}IMPORTANT:${NC} Signup is enabled. After creating your account, disable it:${NC}" echo -e " 1. Edit ${DEPLOY_DIR}/.env and set ${BOLD}ALLOW_SIGNUP=false${NC}" - echo -e " 2. Run: ${BOLD}cd ${DEPLOY_DIR} && docker compose -f ${COMPOSE_FILE} up -d${NC}" + echo -e " 2. Run: ${BOLD}cd ${DEPLOY_DIR} && docker compose -f ${COMPOSE_FILE} up -d --remove-orphans${NC}" echo "" docker compose -f "$COMPOSE_FILE" ps diff --git a/docs/installation.mdx b/docs/installation.mdx index a3ee0be..7331407 100644 --- a/docs/installation.mdx +++ b/docs/installation.mdx @@ -39,15 +39,24 @@ cp .env.example .env Edit `.env` with your values (see below), then start the stack: ```bash -docker compose -f compose.production.yml up -d --pull always +docker compose -f compose.production.yml up -d --pull always --remove-orphans ``` To use the bundled PostgreSQL instead of an external database: ```bash -docker compose -f compose.postgres.yml up -d --pull always +docker compose -f compose.postgres.yml up -d --pull always --remove-orphans ``` +Production hosts should also cap Docker container logs. The installer creates +`/etc/docker/daemon.json` with `json-file` rotation on fresh hosts. If Docker is +already configured, keep your existing daemon settings and add equivalent log +rotation manually. + +The Compose files include container health checks for visibility. Plain Docker +Compose reports unhealthy containers but does not restart them automatically, so +use the common commands below when investigating a self-hosted service. + ## Environment Variables ### Required @@ -135,8 +144,12 @@ docker compose -f compose.production.yml ps docker compose -f compose.production.yml logs -f # Stop all services -docker compose -f compose.production.yml down +docker compose -f compose.production.yml down --remove-orphans -# Update to latest version -docker compose -f compose.production.yml up -d --pull always +# Update to the configured image references +docker compose -f compose.production.yml up -d --pull always --remove-orphans ``` + +Use versioned or digest-pinned image references for production updates when +possible. Mutable tags such as `latest` and `tip` are convenient, but they can +move between pulls. diff --git a/web/SELF-HOSTING.md b/web/SELF-HOSTING.md index d9c516c..62e4310 100644 --- a/web/SELF-HOSTING.md +++ b/web/SELF-HOSTING.md @@ -18,9 +18,13 @@ cp .env.example .env Edit `.env` with your values, then: ```bash -docker compose -f compose.production.yml up -d --build +docker compose -f compose.production.yml up -d --build --remove-orphans ``` +Production hosts should cap Docker container logs in `/etc/docker/daemon.json`. +For release deployments, prefer versioned or digest-pinned image references over +mutable tags such as `latest` or `tip`. + ## Services | Service | Endpoint | @@ -82,6 +86,6 @@ Escape `$` as `$$` in the `.env` file. ```bash docker compose -f compose.production.yml ps docker compose -f compose.production.yml logs -f -docker compose -f compose.production.yml down -docker compose -f compose.production.yml up -d --build +docker compose -f compose.production.yml down --remove-orphans +docker compose -f compose.production.yml up -d --build --remove-orphans ``` diff --git a/web/app/(dashboard)/dashboard/projects/[slug]/[env]/import-compose/import-compose-form.tsx b/web/app/(dashboard)/dashboard/projects/[slug]/[env]/import-compose/import-compose-form.tsx index 54d25af..c17d280 100644 --- a/web/app/(dashboard)/dashboard/projects/[slug]/[env]/import-compose/import-compose-form.tsx +++ b/web/app/(dashboard)/dashboard/projects/[slug]/[env]/import-compose/import-compose-form.tsx @@ -1,34 +1,34 @@ "use client"; -import { useState } from "react"; -import { useRouter } from "next/navigation"; -import Link from "next/link"; import { - FileText, - AlertTriangle, AlertCircle, + AlertTriangle, + ArrowLeft, Box, - HardDrive, Check, - ChevronRight, ChevronLeft, - ArrowLeft, + ChevronRight, + FileText, + HardDrive, } from "lucide-react"; -import { Button } from "@/components/ui/button"; -import { Label } from "@/components/ui/label"; -import { Input } from "@/components/ui/input"; -import { Textarea } from "@/components/ui/textarea"; -import { Alert, AlertTitle, AlertDescription } from "@/components/ui/alert"; -import { Badge } from "@/components/ui/badge"; +import Link from "next/link"; +import { useRouter } from "next/navigation"; +import { useState } from "react"; import { - parseComposeFile, importCompose, + parseComposeFile, type ServiceOverride, } from "@/actions/compose"; +import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { Textarea } from "@/components/ui/textarea"; import type { ParsedService, - ParseWarning, ParseError, + ParseWarning, } from "@/lib/compose-parser"; type Step = "upload" | "preview" | "configure" | "importing" | "complete"; @@ -230,7 +230,7 @@ export function ImportComposeForm({ placeholder={`version: "3.8" services: web: - image: nginx:latest + image: nginx:1.27 ports: - "80:80"`} className="font-mono text-sm min-h-[300px]" diff --git a/web/components/service/create-service-dialog.tsx b/web/components/service/create-service-dialog.tsx index dc74668..adc10d7 100644 --- a/web/components/service/create-service-dialog.tsx +++ b/web/components/service/create-service-dialog.tsx @@ -1,26 +1,27 @@ "use client"; -import { useState } from "react"; +import { Box, Github, Plus, Upload } from "lucide-react"; +import Link from "next/link"; import { useRouter } from "next/navigation"; +import { useState } from "react"; import { useSWRConfig } from "swr"; import { createService, validateDockerImage } from "@/actions/projects"; +import { GitHubRepoSelector } from "@/components/github/github-repo-selector"; import { Button } from "@/components/ui/button"; -import { Input } from "@/components/ui/input"; -import { Label } from "@/components/ui/label"; import { Dialog, DialogContent, DialogHeader, DialogTitle, } from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; import { Popover, PopoverContent, PopoverTrigger, } from "@/components/ui/popover"; -import { GitHubRepoSelector } from "@/components/github/github-repo-selector"; -import { Box, Github, Plus, Upload } from "lucide-react"; -import Link from "next/link"; +import { imageNeedsProductionPinning } from "@/lib/docker-image"; type SelectedRepo = { id?: number; @@ -184,9 +185,15 @@ export function CreateDockerServiceDialog({ setImage(e.target.value); setError(null); }} - placeholder="nginx:latest" + placeholder="nginx:1.27" /> {error &&

{error}

} + {imageNeedsProductionPinning(image.trim()) && ( +

+ Use a version tag or digest for production. Unqualified images + default to latest. +

+ )}

Supported: Docker Hub, GitHub Container Registry (ghcr.io), or any public registry diff --git a/web/components/service/details/source-section.tsx b/web/components/service/details/source-section.tsx index cf0c2d6..bf8058d 100644 --- a/web/components/service/details/source-section.tsx +++ b/web/components/service/details/source-section.tsx @@ -1,11 +1,11 @@ "use client"; +import { Box, GitBranch, Github, Loader2 } from "lucide-react"; import { memo, useState } from "react"; import { toast } from "sonner"; -import { Box, GitBranch, Github, Loader2 } from "lucide-react"; import { - updateServiceGithubRepo, updateServiceConfig, + updateServiceGithubRepo, validateDockerImage, } from "@/actions/projects"; import { Button } from "@/components/ui/button"; @@ -19,6 +19,7 @@ import { } from "@/components/ui/item"; import { Label } from "@/components/ui/label"; import type { ServiceWithDetails as Service } from "@/db/types"; +import { imageNeedsProductionPinning } from "@/lib/docker-image"; function parseImageInfo(image: string): { registry: string; @@ -270,7 +271,7 @@ export const SourceSection = memo(function SourceSection({ { setImage(e.target.value); @@ -280,6 +281,12 @@ export const SourceSection = memo(function SourceSection({ {imageError && (

{imageError}

)} + {imageNeedsProductionPinning(image.trim()) && ( +

+ Use a version tag or digest for production. Unqualified images + default to latest. +

+ )}

Supported: Docker Hub, GitHub Container Registry (ghcr.io), or any public registry diff --git a/web/db/schema.ts b/web/db/schema.ts index e8e83bb..757b6ed 100644 --- a/web/db/schema.ts +++ b/web/db/schema.ts @@ -456,6 +456,12 @@ export const deployments = pgTable( healthStatus: text("health_status", { enum: ["none", "starting", "healthy", "unhealthy"], }), + unhealthyReportCount: integer("unhealthy_report_count") + .notNull() + .default(0), + autohealRestartCount: integer("autoheal_restart_count") + .notNull() + .default(0), rolloutId: text("rollout_id"), previousDeploymentId: text("previous_deployment_id"), failedStage: text("failed_stage"), diff --git a/web/lib/agent-status.ts b/web/lib/agent-status.ts index ae30751..e1586ed 100644 --- a/web/lib/agent-status.ts +++ b/web/lib/agent-status.ts @@ -14,6 +14,10 @@ import { import { inngest } from "@/lib/inngest/client"; import { inngestEvents } from "@/lib/inngest/events"; import { ingestRolloutLog } from "@/lib/victoria-logs"; +import { enqueueWork } from "@/lib/work-queue"; + +const AUTOHEAL_UNHEALTHY_REPORTS = 3; +const AUTOHEAL_MAX_RESTARTS = 3; type ContainerStatus = { deploymentId: string; @@ -225,6 +229,9 @@ export async function applyStatusReport( } const updateFields: Record = { healthStatus }; + let autohealRestartPayload: Record | null = null; + let autohealFailed = false; + if (deployment.containerId !== container.containerId) { updateFields.containerId = container.containerId; } @@ -301,11 +308,69 @@ export async function applyStatusReport( ); } + const canAutoheal = + container.status === "running" && + (deployment.status === "running" || deployment.status === "healthy"); + const healthRecovered = + healthStatus === "healthy" || healthStatus === "none"; + + if (canAutoheal && healthStatus === "unhealthy") { + const unhealthyReportCount = (deployment.unhealthyReportCount ?? 0) + 1; + updateFields.unhealthyReportCount = unhealthyReportCount; + + if (unhealthyReportCount >= AUTOHEAL_UNHEALTHY_REPORTS) { + const restartCount = deployment.autohealRestartCount ?? 0; + + if (restartCount >= AUTOHEAL_MAX_RESTARTS) { + console.log( + `[autoheal] deployment ${deployment.id} exceeded restart limit`, + ); + updateFields.status = "failed"; + updateFields.failedStage = "autoheal"; + autohealFailed = true; + } else { + console.log( + `[autoheal] restarting unhealthy deployment ${deployment.id} (${restartCount + 1}/${AUTOHEAL_MAX_RESTARTS})`, + ); + updateFields.unhealthyReportCount = 0; + updateFields.autohealRestartCount = restartCount + 1; + autohealRestartPayload = { + deploymentId: deployment.id, + containerId: container.containerId, + reason: "autoheal_unhealthy", + }; + } + } + } else if (healthRecovered) { + updateFields.unhealthyReportCount = 0; + } + await db .update(deployments) .set(updateFields) .where(eq(deployments.id, deployment.id)); + if (autohealRestartPayload) { + await enqueueWork(serverId, "restart", autohealRestartPayload); + } + + if (autohealFailed && deployment.rolloutId) { + await ingestRolloutLog( + deployment.rolloutId, + deployment.serviceId, + "autoheal", + `Deployment ${deployment.id} exceeded autoheal restart limit`, + ); + await inngest.send( + inngestEvents.deploymentFailed.create({ + deploymentId: deployment.id, + rolloutId: deployment.rolloutId, + serviceId: deployment.serviceId, + reason: "autoheal_restart_limit", + }), + ); + } + if ( deployment.status === "starting" && container.status === "running" && diff --git a/web/lib/compose-parser.ts b/web/lib/compose-parser.ts index cc82793..5adc734 100644 --- a/web/lib/compose-parser.ts +++ b/web/lib/compose-parser.ts @@ -1,5 +1,9 @@ import { parse as parseYaml } from "yaml"; import { z } from "zod"; +import { + imageIsUnqualified, + imageUsesMutableReference, +} from "@/lib/docker-image"; import { formatZodErrors } from "@/lib/utils"; const composeHealthcheckSchema = z.object({ @@ -318,6 +322,10 @@ function parseHealthcheck( }; } +function volumeMentionsDockerSocket(volume: string): boolean { + return volume.split(":").some((part) => part === "/var/run/docker.sock"); +} + function quoteIfNeeded(arg: string): string { if (arg.includes(" ") || arg.includes('"') || arg.includes("'")) { return `"${arg.replace(/"/g, '\\"')}"`; @@ -435,6 +443,24 @@ export function parseComposeYaml(yamlContent: string): ComposeParseResult { }); } + if (imageIsUnqualified(serviceConfig.image)) { + warnings.push({ + service: serviceName, + field: "image", + message: + "Image uses Docker Hub shorthand. Prefer a fully qualified registry path for production.", + }); + } + + if (imageUsesMutableReference(serviceConfig.image)) { + warnings.push({ + service: serviceName, + field: "image", + message: + "Image uses a mutable tag or implicit latest. Pin a version tag or digest for production deploys.", + }); + } + if (serviceConfig.depends_on) { warnings.push({ service: serviceName, @@ -478,7 +504,7 @@ export function parseComposeYaml(yamlContent: string): ComposeParseResult { service: serviceName, field: "restart", message: - "Restart policy ignored. Platform manages restarts automatically.", + "Restart policy ignored. The platform manages lifecycle, health, and restart behavior automatically.", }); } @@ -488,20 +514,28 @@ export function parseComposeYaml(yamlContent: string): ComposeParseResult { if (result.error) { errors.push({ service: serviceName, message: result.error }); } else if (result.port) { + const port = result.port; if ( !parsedPorts.some( - (p) => - p.port === result.port!.port && - p.protocol === result.port!.protocol, + (p) => p.port === port.port && p.protocol === port.protocol, ) ) { - parsedPorts.push(result.port); + parsedPorts.push(port); } } } const parsedVolumes: ParsedVolume[] = []; for (const volumeDef of serviceConfig.volumes || []) { + if (volumeMentionsDockerSocket(volumeDef)) { + warnings.push({ + service: serviceName, + field: "volumes", + message: + "Docker socket mount ignored. Mounting /var/run/docker.sock grants host-level Docker control.", + }); + } + const result = parseVolume(volumeDef, serviceName, definedVolumes); if (result.error) { errors.push({ service: serviceName, message: result.error }); @@ -512,8 +546,9 @@ export function parseComposeYaml(yamlContent: string): ComposeParseResult { message: result.warning, }); } else if (result.volume) { - if (!parsedVolumes.some((v) => v.name === result.volume!.name)) { - parsedVolumes.push(result.volume); + const volume = result.volume; + if (!parsedVolumes.some((v) => v.name === volume.name)) { + parsedVolumes.push(volume); } } } diff --git a/web/lib/docker-image.ts b/web/lib/docker-image.ts new file mode 100644 index 0000000..2bf3dfb --- /dev/null +++ b/web/lib/docker-image.ts @@ -0,0 +1,18 @@ +export function imageUsesMutableReference(image: string): boolean { + if (image.includes("@")) return false; + + const lastSlash = image.lastIndexOf("/"); + const lastColon = image.lastIndexOf(":"); + if (lastColon <= lastSlash) return true; + + return image.slice(lastColon + 1) === "latest"; +} + +export function imageIsUnqualified(image: string): boolean { + const imageWithoutDigest = image.split("@")[0]; + return !imageWithoutDigest.includes("/"); +} + +export function imageNeedsProductionPinning(image: string): boolean { + return image !== "" && imageUsesMutableReference(image); +}