From 6748ed863932c3cf67491aaadeb6fc2bb91468c3 Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Wed, 11 Feb 2026 11:37:15 -0800
Subject: [PATCH 01/12] feat: db and schema updates for new retry logic

chose to name it failedSendAttemptCount as the nextSendRetryAt can only be set when a send attempt fails.
nextSendRetryAt controls the retry logic.
sendAttemptErrors is named as such because it tracks all of the send errors not just retry errors.
We also put in api schema updates and type updates.
---
 .../migration.sql                             | 25 +++++++++++++++++++
 apps/backend/prisma/schema.prisma             | 14 ++++++++---
 .../src/interface/crud/email-outbox.ts        | 13 ++++++++++
 .../apps/implementations/admin-app-impl.ts    | 18 +++++++++++++
 .../template/src/lib/stack-app/email/index.ts | 14 +++++++++++
 packages/template/src/lib/stack-app/index.ts  |  1 +
 6 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100644 apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql

diff --git a/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql b/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql
new file mode 100644
index 0000000000..b8f3adc729
--- /dev/null
+++ b/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql
@@ -0,0 +1,25 @@
+-- Add deferred retry fields for email sending
+-- These fields allow the email queue to schedule retries for later iterations
+-- instead of blocking the current iteration with inline retries.
+
+ALTER TABLE "EmailOutbox"
+  ADD COLUMN "failedSendAttemptCount" INTEGER NOT NULL DEFAULT 0,
+  ADD COLUMN "nextSendRetryAt" TIMESTAMP(3),
+  ADD COLUMN "sendAttemptErrors" JSONB;
+
+-- Constraint: nextSendRetryAt can only be set after at least one failed attempt
+-- (if failedSendAttemptCount is 0, no attempt has failed, so there's nothing to retry)
+ALTER TABLE "EmailOutbox"
+  ADD CONSTRAINT "EmailOutbox_nextSendRetryAt_requires_failure"
+  CHECK ("nextSendRetryAt" IS NULL OR "failedSendAttemptCount" > 0);
+
+-- Constraint: sendAttemptErrors can only be set after at least one failed attempt
+ALTER TABLE "EmailOutbox"
+  ADD CONSTRAINT "EmailOutbox_sendAttemptErrors_requires_failure"
+  CHECK ("sendAttemptErrors" IS NULL OR "failedSendAttemptCount" > 0);
+
+-- Constraint: nextSendRetryAt must be null when email has finished sending
+-- (if finishedSendingAt is set, there's nothing more to retry)
+ALTER TABLE "EmailOutbox"
+  ADD CONSTRAINT "EmailOutbox_no_retry_after_finished"
+  CHECK ("finishedSendingAt" IS NULL OR "nextSendRetryAt" IS NULL);
diff --git a/apps/backend/prisma/schema.prisma b/apps/backend/prisma/schema.prisma
index 21b781e81a..7712ed8713 100644
--- a/apps/backend/prisma/schema.prisma
+++ b/apps/backend/prisma/schema.prisma
@@ -99,8 +99,8 @@ model ExternalDbSyncMetadata {
 
   singleton BooleanTrue @unique @default(TRUE)
 
-  sequencerEnabled  Boolean @default(true)
-  pollerEnabled     Boolean @default(true)
+  sequencerEnabled Boolean @default(true)
+  pollerEnabled    Boolean @default(true)
 
   createdAt DateTime @default(now())
   updatedAt DateTime @updatedAt
@@ -844,6 +844,14 @@ model EmailOutbox {
   // if startedSendingAt is not set, then finishedSendingAt is also not set
   finishedSendingAt DateTime?
 
+  // Deferred retry fields for email sending
+  // Number of failed send attempts (starts at 0, incremented on each failure). Reset when email content is edited.
+  failedSendAttemptCount Int       @default(0)
+  // When to retry sending (null = not waiting for retry). Set when a retryable error occurs. Reset when email content is edited. Must be null if failedSendAttemptCount is 0 (enforced by EmailOutbox_nextSendRetryAt_requires_failure). Must be null when finishedSendingAt is set (enforced by EmailOutbox_no_retry_after_finished).
+  nextSendRetryAt        DateTime?
+  // JSON array of errors from each failed send attempt. Each entry has: { attemptNumber, timestamp, externalMessage, externalDetails, internalMessage, internalDetails }. Reset when email content is edited. Must be null if failedSendAttemptCount is 0 (enforced by EmailOutbox_sendAttemptErrors_requires_failure).
+  sendAttemptErrors      Json?
+
   // A generated column that is equal to finishedSendingAt if canHaveDeliveryInfo is false, otherwise deliveredAt.
   sentAt DateTime? @default(dbgenerated("\nCASE\n    WHEN (\"canHaveDeliveryInfo\" IS TRUE) THEN \"deliveredAt\"\n    WHEN (\"canHaveDeliveryInfo\" IS FALSE) THEN \"finishedSendingAt\"\n    ELSE NULL::timestamp without time zone\nEND"))
 
@@ -1088,7 +1096,7 @@ model OutgoingRequest {
 
   qstashOptions       Json
   startedFulfillingAt DateTime?
-  deduplicationKey String?
+  deduplicationKey    String?
 
   // Partial unique index on deduplicationKey WHERE startedFulfillingAt IS NULL
   // is created in a custom migration (not expressible in Prisma schema)
diff --git a/packages/stack-shared/src/interface/crud/email-outbox.ts b/packages/stack-shared/src/interface/crud/email-outbox.ts
index 3629f81b7e..ddc785bd83 100644
--- a/packages/stack-shared/src/interface/crud/email-outbox.ts
+++ b/packages/stack-shared/src/interface/crud/email-outbox.ts
@@ -40,6 +40,19 @@ const emailOutboxBaseSchema = fieldSchema.yupObject({
   skip_deliverability_check: fieldSchema.yupBoolean().defined(),
   scheduled_at_millis: fieldSchema.yupNumber().defined(),
 
+  // Retry-related fields (for debugging/testing deferred retry logic)
+  failed_send_attempt_count: fieldSchema.yupNumber().defined(),
+  next_send_retry_at_millis: fieldSchema.yupNumber().nullable().defined(),
+  // Array of errors from each failed send attempt, each with internal/external messages
+  send_attempt_errors: fieldSchema.yupArray(fieldSchema.yupObject({
+    attempt_number: fieldSchema.yupNumber().defined(),
+    timestamp: fieldSchema.yupString().defined(),
+    external_message: fieldSchema.yupString().defined(),
+    external_details: fieldSchema.yupRecord(fieldSchema.yupString(), fieldSchema.jsonSchema).defined(),
+    internal_message: fieldSchema.yupString().defined(),
+    internal_details: fieldSchema.yupRecord(fieldSchema.yupString(), fieldSchema.jsonSchema).defined(),
+  }).defined()).nullable().defined(),
+
   status: fieldSchema.yupString().defined(),
   simple_status: fieldSchema.yupString().defined(),
 
diff --git a/packages/template/src/lib/stack-app/apps/implementations/admin-app-impl.ts b/packages/template/src/lib/stack-app/apps/implementations/admin-app-impl.ts
index 8f9b331b21..9dd25fe9a4 100644
--- a/packages/template/src/lib/stack-app/apps/implementations/admin-app-impl.ts
+++ b/packages/template/src/lib/stack-app/apps/implementations/admin-app-impl.ts
@@ -733,6 +733,24 @@ export class _StackAdminAppImplIncomplete<HasTokenStore extends boolean, Project
       isPaused: false as const,
       hasRendered: false as const,
       hasDelivered: false as const,
+      // Retry tracking fields
+      failedSendAttemptCount: crud.failed_send_attempt_count as number,
+      nextSendRetryAt: crud.next_send_retry_at_millis ? new Date(crud.next_send_retry_at_millis) : null,
+      sendAttemptErrors: crud.send_attempt_errors ? (crud.send_attempt_errors as Array<{
+        attempt_number: number,
+        timestamp: string,
+        external_message: string,
+        external_details: Record<string, unknown>,
+        internal_message: string,
+        internal_details: Record<string, unknown>,
+      }>).map((e) => ({
+        attemptNumber: e.attempt_number,
+        timestamp: e.timestamp,
+        externalMessage: e.external_message,
+        externalDetails: e.external_details,
+        internalMessage: e.internal_message,
+        internalDetails: e.internal_details,
+      })) : null,
     };
 
     // Rendered fields (available after rendering completes successfully)
diff --git a/packages/template/src/lib/stack-app/email/index.ts b/packages/template/src/lib/stack-app/email/index.ts
index 36a73e5ba8..c3cb3e8d51 100644
--- a/packages/template/src/lib/stack-app/email/index.ts
+++ b/packages/template/src/lib/stack-app/email/index.ts
@@ -36,6 +36,16 @@ export type AdminEmailOutboxSimpleStatus =
   | "ok"
   | "error";
 
+// Error entry from a failed send attempt
+export type AdminSendAttemptError = {
+  attemptNumber: number,
+  timestamp: string,
+  externalMessage: string,
+  externalDetails: Record<string, unknown>,
+  internalMessage: string,
+  internalDetails: Record<string, unknown>,
+};
+
 // =============================== BASE TYPES ===============================
 
 // Base fields present on all emails
@@ -48,6 +58,10 @@ type AdminEmailOutboxBase = {
   isPaused: false,
   hasRendered: false,
   hasDelivered: false,
+  // Retry tracking fields
+  failedSendAttemptCount: number,
+  nextSendRetryAt: Date | null,
+  sendAttemptErrors: AdminSendAttemptError[] | null,
 };
 
 // Fields available after rendering completes successfully
diff --git a/packages/template/src/lib/stack-app/index.ts b/packages/template/src/lib/stack-app/index.ts
index 87bf010a0f..01f9b11af1 100644
--- a/packages/template/src/lib/stack-app/index.ts
+++ b/packages/template/src/lib/stack-app/index.ts
@@ -56,6 +56,7 @@ export type {
   AdminEmailOutboxRecipient,
   AdminEmailOutboxSimpleStatus,
   AdminEmailOutboxStatus,
+  AdminSendAttemptError,
   AdminSentEmail
 } from "./email";
 

From 87b1c9c092ba6cee4ffeca36a17f37f92b34098a Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Wed, 11 Feb 2026 12:52:58 -0800
Subject: [PATCH 02/12] refactor: remove low-level email retry

Spending too much time on retries in the low level email sending clogs the email-queue-step.
This can lead to cascading delays if there is an outage with our sender provider.
This will cause a vercel runtime failure.
Removing the retry logic also obviates the need for separate funcs.
---
 apps/backend/src/lib/emails-low-level.tsx | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/apps/backend/src/lib/emails-low-level.tsx b/apps/backend/src/lib/emails-low-level.tsx
index a91e31744c..a2d0e6240b 100644
--- a/apps/backend/src/lib/emails-low-level.tsx
+++ b/apps/backend/src/lib/emails-low-level.tsx
@@ -214,7 +214,23 @@ export async function lowLevelSendEmailDirectWithoutRetries(options: LowLevelSen
   canRetry: boolean,
   message?: string,
 }>> {
-  return await _lowLevelSendEmailWithoutRetries(options);
+  if (!options.to) {
+    throw new StackAssertionError("No recipient email address provided to sendEmail", omit(options, ['emailConfig']));
+  }
+
+  const result = await _lowLevelSendEmailWithoutRetries(options);
+
+  if (result.status === 'error') {
+    console.warn("Failed to send email.", {
+      host: options.emailConfig.host,
+      from: options.emailConfig.senderEmail,
+      to: options.to,
+      subject: options.subject,
+      error: result.error,
+    }, result.error.rawError);
+  }
+
+  return result;
 }
 
 // currently unused, although in the future we may want to use this to minimize the number of requests to Resend

From ace7319471ab7b00489035ab64acf3579e224092 Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Wed, 11 Feb 2026 13:10:36 -0800
Subject: [PATCH 03/12] refactor: nix unused low-level resend func

The comment indicates this isn't used anywhere else.
There are no uses of it in our codebase.
---
 apps/backend/src/lib/emails-low-level.tsx | 100 ----------------------
 1 file changed, 100 deletions(-)

diff --git a/apps/backend/src/lib/emails-low-level.tsx b/apps/backend/src/lib/emails-low-level.tsx
index a2d0e6240b..0183c28a70 100644
--- a/apps/backend/src/lib/emails-low-level.tsx
+++ b/apps/backend/src/lib/emails-low-level.tsx
@@ -10,8 +10,6 @@ import { runAsynchronously, wait } from '@stackframe/stack-shared/dist/utils/pro
 import { Result } from '@stackframe/stack-shared/dist/utils/results';
 import { traceSpan } from '@stackframe/stack-shared/dist/utils/telemetry';
 import nodemailer from 'nodemailer';
-import { Resend } from 'resend';
-import { getTenancy } from './tenancies';
 
 export function isSecureEmailPort(port: number | string) {
   // "secure" in most SMTP clients means implicit TLS from byte 1 (SMTPS)
@@ -232,101 +230,3 @@ export async function lowLevelSendEmailDirectWithoutRetries(options: LowLevelSen
 
   return result;
 }
-
-// currently unused, although in the future we may want to use this to minimize the number of requests to Resend
-export async function lowLevelSendEmailResendBatchedDirect(resendApiKey: string, emailOptions: LowLevelSendEmailOptions[]) {
-  if (emailOptions.length === 0) {
-    return Result.ok([]);
-  }
-  if (emailOptions.length > 100) {
-    throw new StackAssertionError("sendEmailResendBatchedDirect expects at most 100 emails to be sent at once", { emailOptions });
-  }
-  if (emailOptions.some(option => option.tenancyId !== emailOptions[0].tenancyId)) {
-    throw new StackAssertionError("sendEmailResendBatchedDirect expects all emails to be sent from the same tenancy", { emailOptions });
-  }
-  const tenancy = await getTenancy(emailOptions[0].tenancyId);
-  if (!tenancy) {
-    throw new StackAssertionError("Tenancy not found");
-  }
-  const resend = new Resend(resendApiKey);
-  const result = await Result.retry(async (_) => {
-    const { data, error } = await resend.batch.send(emailOptions.map((option) => ({
-      from: option.emailConfig.senderEmail,
-      to: option.to,
-      subject: option.subject,
-      html: option.html ?? "",
-      text: option.text,
-    })));
-
-    if (data) {
-      return Result.ok(data.data);
-    }
-    if (error.name === "rate_limit_exceeded" || error.name === "internal_server_error") {
-      // these are the errors we want to retry
-      return Result.error(error);
-    }
-    throw new StackAssertionError("Failed to send email with Resend", { error });
-  }, 3, { exponentialDelayBase: 2000 });
-
-  return result;
-}
-
-export async function lowLevelSendEmailDirectViaProvider(options: LowLevelSendEmailOptions): Promise<Result<undefined, {
-  rawError: any,
-  errorType: string,
-  canRetry: boolean,
-  message?: string,
-}>> {
-  if (!options.to) {
-    throw new StackAssertionError("No recipient email address provided to sendEmail", omit(options, ['emailConfig']));
-  }
-
-  class DoNotRetryError extends Error {
-    constructor(public readonly errorObj: {
-      rawError: any,
-      errorType: string,
-      canRetry: boolean,
-      message?: string,
-    }) {
-      super("This error should never be caught anywhere else but inside the lowLevelSendEmailDirectViaProvider function, something went wrong if you see this!");
-    }
-  }
-
-  let result;
-  try {
-    result = await Result.retry(async (attempt) => {
-      const result = await lowLevelSendEmailDirectWithoutRetries(options);
-
-      if (result.status === 'error') {
-        const extraData = {
-          host: options.emailConfig.host,
-          from: options.emailConfig.senderEmail,
-          to: options.to,
-          subject: options.subject,
-          error: result.error,
-        };
-
-        if (result.error.canRetry) {
-          console.warn("Failed to send email, but error is possibly transient so retrying.", extraData, result.error.rawError);
-          return Result.error(result.error);
-        }
-
-        console.warn("Failed to send email, and error is not transient, so not retrying.", extraData, result.error.rawError);
-        throw new DoNotRetryError(result.error);
-      }
-
-      return result;
-    }, 9, { exponentialDelayBase: 125 });
-  } catch (error) {
-    if (error instanceof DoNotRetryError) {
-      return Result.error(error.errorObj);
-    }
-    throw error;
-  }
-
-  if (result.status === 'error') {
-    console.warn("Failed to send email after all retries!", result.error);
-    return Result.error(result.error.errors[0]);
-  }
-  return Result.ok(undefined);
-}

From 4897f62844627b15e7e2da8d1f15ae9727131cdf Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Wed, 11 Feb 2026 19:10:54 -0800
Subject: [PATCH 04/12] feat: rework email queue step retry logic

Now we mark emails to be retried in one iteration,
 and retry them in the next iteration.
---
 .../src/app/api/latest/emails/outbox/crud.tsx |  29 ++
 apps/backend/src/lib/email-queue-step.tsx     | 149 ++++++--
 .../api/v1/emails/email-queue.test.ts         | 336 +++++++++++++++++-
 3 files changed, 491 insertions(+), 23 deletions(-)

diff --git a/apps/backend/src/app/api/latest/emails/outbox/crud.tsx b/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
index a4aa26a916..42d76eec31 100644
--- a/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
+++ b/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
@@ -7,6 +7,7 @@ import { KnownErrors } from "@stackframe/stack-shared";
 import { emailOutboxCrud, EmailOutboxCrud } from "@stackframe/stack-shared/dist/interface/crud/email-outbox";
 import { yupObject, yupString } from "@stackframe/stack-shared/dist/schema-fields";
 import { StackAssertionError, StatusError, throwErr } from "@stackframe/stack-shared/dist/utils/errors";
+import { Json } from "@stackframe/stack-shared/dist/utils/json";
 import { createLazyProxy } from "@stackframe/stack-shared/dist/utils/proxies";
 
 /**
@@ -57,6 +58,25 @@ function prismaModelToCrud(prismaModel: EmailOutbox): EmailOutboxCrud["Server"][
     to = { type: "custom-emails", emails: recipient?.emails ?? [] };
   }
 
+  // Convert sendAttemptErrors from DB format (camelCase) to API format (snake_case)
+  const sendAttemptErrors = prismaModel.sendAttemptErrors
+    ? (prismaModel.sendAttemptErrors as Array<{
+        attemptNumber: number,
+        timestamp: string,
+        externalMessage: string,
+        externalDetails: Record<string, Json>,
+        internalMessage: string,
+        internalDetails: Record<string, Json>,
+      }>).map(e => ({
+        attempt_number: e.attemptNumber,
+        timestamp: e.timestamp,
+        external_message: e.externalMessage,
+        external_details: e.externalDetails,
+        internal_message: e.internalMessage,
+        internal_details: e.internalDetails,
+      }))
+    : null;
+
   // Base fields present on all emails
   const base = {
     id: prismaModel.id,
@@ -68,6 +88,9 @@ function prismaModelToCrud(prismaModel: EmailOutbox): EmailOutboxCrud["Server"][
     variables: (prismaModel.extraRenderVariables ?? {}) as Record<string, any>,
     skip_deliverability_check: prismaModel.shouldSkipDeliverabilityCheck,
     scheduled_at_millis: prismaModel.scheduledAt.getTime(),
+    failed_send_attempt_count: prismaModel.failedSendAttemptCount,
+    next_send_retry_at_millis: prismaModel.nextSendRetryAt?.getTime() ?? null,
+    send_attempt_errors: sendAttemptErrors,
     // Default flags (overridden in specific statuses)
     is_paused: false,
     has_rendered: false,
@@ -395,6 +418,8 @@ export const emailOutboxCrudHandlers = createLazyProxy(() => createCrudHandlers(
       // If content changed, reset rendering and sending state
       if (needsRerenderReset) {
         set("isQueued", Prisma.sql`false`);
+        // Reset retry fields (failedSendAttemptCount to 0, others to null)
+        set("failedSendAttemptCount", Prisma.sql`0`);
         setNull(
           "renderedByWorkerId", "startedRenderingAt", "finishedRenderingAt",
           "renderErrorExternalMessage", "renderErrorExternalDetails",
@@ -402,6 +427,7 @@ export const emailOutboxCrudHandlers = createLazyProxy(() => createCrudHandlers(
           "renderedHtml", "renderedText", "renderedSubject",
           "renderedIsTransactional", "renderedNotificationCategoryId",
           "startedSendingAt", "finishedSendingAt",
+          "nextSendRetryAt", "sendAttemptErrors",
           "sendServerErrorExternalMessage", "sendServerErrorExternalDetails",
           "sendServerErrorInternalMessage", "sendServerErrorInternalDetails",
           "skippedReason", "skippedDetails", "canHaveDeliveryInfo",
@@ -494,6 +520,9 @@ function parseEmailOutboxFromJson(j: Record<string, unknown>): EmailOutbox {
     scheduledAtIfNotYetQueued: dateOrNull("scheduledAtIfNotYetQueued"),
     startedSendingAt: dateOrNull("startedSendingAt"),
     finishedSendingAt: dateOrNull("finishedSendingAt"),
+    failedSendAttemptCount: j.failedSendAttemptCount as number,
+    nextSendRetryAt: dateOrNull("nextSendRetryAt"),
+    sendAttemptErrors: j.sendAttemptErrors as Prisma.JsonValue,
     sentAt: dateOrNull("sentAt"),
     sendServerErrorExternalMessage: j.sendServerErrorExternalMessage as string | null,
     sendServerErrorExternalDetails: j.sendServerErrorExternalDetails as Prisma.JsonValue,
diff --git a/apps/backend/src/lib/email-queue-step.tsx b/apps/backend/src/lib/email-queue-step.tsx
index 0d72ea2e1f..e82b22d7a9 100644
--- a/apps/backend/src/lib/email-queue-step.tsx
+++ b/apps/backend/src/lib/email-queue-step.tsx
@@ -15,10 +15,40 @@ import { filterUndefined } from "@stackframe/stack-shared/dist/utils/objects";
 import { Result } from "@stackframe/stack-shared/dist/utils/results";
 import { traceSpan } from "@stackframe/stack-shared/dist/utils/telemetry";
 import { randomUUID } from "node:crypto";
-import { lowLevelSendEmailDirectViaProvider } from "./emails-low-level";
+import { lowLevelSendEmailDirectWithoutRetries } from "./emails-low-level";
 
 const MAX_RENDER_BATCH = 50;
 
+const MAX_SEND_ATTEMPTS = 3;
+
+const RETRY_BACKOFF_BASE_MS = 2000;
+
+const calculateRetryBackoffMs = (attemptCount: number): number => {
+  return (Math.random() + 0.5) * RETRY_BACKOFF_BASE_MS * Math.pow(2, attemptCount);
+};
+
+/**
+ * Structure for tracking errors from each send attempt.
+ * Mirrors the pattern used for sendServerError* fields.
+ * Uses Prisma.InputJsonValue-compatible types for DB storage.
+ */
+type SendAttemptError = {
+  attemptNumber: number,
+  timestamp: string,
+  externalMessage: string,
+  externalDetails: Prisma.InputJsonObject,
+  internalMessage: string,
+  internalDetails: Prisma.InputJsonObject,
+};
+
+const appendSendAttemptError =(
+  existingErrors: SendAttemptError[] | null | undefined,
+  newError: SendAttemptError
+): SendAttemptError[] => {
+  const errors = existingErrors ?? [];
+  return [...errors, newError];
+};
+
 // Track if email queue has run at least once since server start (used to suppress first-run delta warnings in dev)
 const emailQueueFirstRunKey = Symbol.for("__stack_email_queue_first_run_completed");
 
@@ -480,6 +510,7 @@ async function queueReadyEmails(): Promise<{ queuedCount: number }> {
       AND "finishedRenderingAt" IS NOT NULL
       AND "renderedHtml" IS NOT NULL
       AND "scheduledAt" <= NOW()
+      AND "nextSendRetryAt" IS NULL
     RETURNING "id";
   `;
   return {
@@ -488,11 +519,16 @@ async function queueReadyEmails(): Promise<{ queuedCount: number }> {
 }
 
 async function prepareSendPlan(deltaSeconds: number): Promise<TenancySendBatch[]> {
+  // Find tenancies with emails ready to send (either new emails or emails ready for retry)
   const tenancyIds = await globalPrismaClient.emailOutbox.findMany({
     where: {
-      isQueued: true,
       isPaused: false,
-      startedSendingAt: null,
+      OR: [
+        // Normal case: queued, not started, and no pending retry
+        { startedSendingAt: null, isQueued: true, nextSendRetryAt: null },
+        // Retry case: past retry time
+        { nextSendRetryAt: { lte: new Date() } },
+      ],
     },
     distinct: ["tenancyId"],
     select: { tenancyId: true },
@@ -523,16 +559,23 @@ async function claimEmailsForSending(tx: PrismaClientTransaction, tenancyId: str
       SELECT "tenancyId", "id"
       FROM "EmailOutbox"
       WHERE "tenancyId" = ${tenancyId}::uuid
-        AND "isQueued" = TRUE
         AND "isPaused" = FALSE
         AND "finishedRenderingAt" IS NOT NULL
-        AND "startedSendingAt" IS NULL
+        AND (
+          -- Normal case: queued, not started, and no pending retry
+          ("startedSendingAt" IS NULL AND "isQueued" = TRUE AND "nextSendRetryAt" IS NULL)
+          OR
+          -- Retry case: past retry time
+          ("nextSendRetryAt" IS NOT NULL AND "nextSendRetryAt" <= NOW())
+        )
       ORDER BY "priority" DESC, "scheduledAt" ASC, "createdAt" ASC
       LIMIT ${limit}
       FOR UPDATE SKIP LOCKED
     )
     UPDATE "EmailOutbox" AS e
-    SET "startedSendingAt" = NOW()
+    SET 
+      "startedSendingAt" = NOW(),
+      "nextSendRetryAt" = NULL
     FROM selected
     WHERE e."tenancyId" = selected."tenancyId" AND e."id" = selected."id"
     RETURNING e.*;
@@ -640,7 +683,7 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
 
     const result = getEnvBoolean("STACK_EMAIL_BRANCHING_DISABLE_QUEUE_SENDING")
       ? Result.error({ errorType: "email-sending-disabled", canRetry: false, message: "Email sending is disabled", rawError: new Error("Email sending is disabled") })
-      : await lowLevelSendEmailDirectViaProvider({
+      : await lowLevelSendEmailDirectWithoutRetries({
         tenancyId: context.tenancy.id,
         emailConfig: context.emailConfig,
         to: resolution.emails,
@@ -650,24 +693,86 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
       });
 
     if (result.status === "error") {
-      await globalPrismaClient.emailOutbox.update({
-        where: {
-          tenancyId_id: {
+      const newAttemptCount = row.failedSendAttemptCount + 1;
+      const canRetry = result.error.canRetry && newAttemptCount < MAX_SEND_ATTEMPTS;
+
+      // Build error entry for this attempt
+      const errorEntry: SendAttemptError = {
+        attemptNumber: newAttemptCount,
+        timestamp: new Date().toISOString(),
+        externalMessage: result.error.message ?? result.error.errorType,
+        externalDetails: { errorType: result.error.errorType },
+        internalMessage: result.error.message ?? result.error.errorType,
+        internalDetails: { rawError: errorToNiceString(result.error.rawError), errorType: result.error.errorType },
+      };
+      const updatedErrors = appendSendAttemptError(row.sendAttemptErrors as SendAttemptError[] | null, errorEntry);
+
+      if (canRetry) {
+        // Schedule retry: unclaim the email and set nextSendRetryAt
+        const backoffMs = calculateRetryBackoffMs(newAttemptCount);
+        await globalPrismaClient.emailOutbox.update({
+          where: {
+            tenancyId_id: {
+              tenancyId: row.tenancyId,
+              id: row.id,
+            },
+            finishedSendingAt: null,
+          },
+          data: {
+            startedSendingAt: null, // Unclaim the email
+            isQueued: false, // Prevent normal queue path from picking it up
+            failedSendAttemptCount: newAttemptCount,
+            nextSendRetryAt: new Date(Date.now() + backoffMs),
+            sendAttemptErrors: updatedErrors as Prisma.InputJsonArray,
+          },
+        });
+      } else {
+        // Mark as permanent failure - distinguish between "attempts exhausted" and "permanent error from provider"
+        const isAttemptsExhausted = result.error.canRetry && newAttemptCount >= MAX_SEND_ATTEMPTS;
+        const failureReason = isAttemptsExhausted ? "attempts_exhausted" : "permanent_error";
+
+        if (isAttemptsExhausted) {
+          captureError("email-queue-step-retries-exhausted", new StackAssertionError(`Email failed after ${newAttemptCount} attempts`, {
+            emailId: row.id,
             tenancyId: row.tenancyId,
-            id: row.id,
+            errorType: result.error.errorType,
+            errorMessage: result.error.message,
+            allAttemptErrors: updatedErrors,
+          }));
+        }
+
+        const externalMessage = isAttemptsExhausted
+          ? "Email could not be delivered after multiple attempts. Please verify your email configuration and try again."
+          : result.error.message;
+
+        await globalPrismaClient.emailOutbox.update({
+          where: {
+            tenancyId_id: {
+              tenancyId: row.tenancyId,
+              id: row.id,
+            },
+            finishedSendingAt: null,
           },
-          finishedSendingAt: null,
-        },
-        data: {
-          finishedSendingAt: new Date(),
-          canHaveDeliveryInfo: false,
-          sendServerErrorExternalMessage: result.error.message,
-          sendServerErrorExternalDetails: { errorType: result.error.errorType },
-          sendServerErrorInternalMessage: result.error.message,
-          sendServerErrorInternalDetails: { rawError: errorToNiceString(result.error.rawError), errorType: result.error.errorType },
-        },
-      });
+          data: {
+            finishedSendingAt: new Date(),
+            canHaveDeliveryInfo: false,
+            failedSendAttemptCount: newAttemptCount,
+            sendAttemptErrors: updatedErrors as Prisma.InputJsonArray,
+            sendServerErrorExternalMessage: externalMessage,
+            sendServerErrorExternalDetails: { errorType: result.error.errorType },
+            sendServerErrorInternalMessage: result.error.message,
+            sendServerErrorInternalDetails: {
+              rawError: errorToNiceString(result.error.rawError),
+              errorType: result.error.errorType,
+              attemptCount: newAttemptCount,
+              failureReason,
+              allAttemptErrors: updatedErrors as Json[],
+            },
+          },
+        });
+      }
     } else {
+      // Success - mark as sent (don't increment failedSendAttemptCount since this wasn't a failure)
       await globalPrismaClient.emailOutbox.update({
         where: {
           tenancyId_id: {
diff --git a/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts b/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
index 2bb050fd4f..adf1d8f2fa 100644
--- a/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
+++ b/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
@@ -1,7 +1,8 @@
 import { wait } from "@stackframe/stack-shared/dist/utils/promises";
 import { deindent, nicify } from "@stackframe/stack-shared/dist/utils/strings";
 import beautify from "js-beautify";
-import { describe } from "vitest";
+import * as net from "net";
+import { afterAll, beforeAll, describe } from "vitest";
 import { it, logIfTestFails } from "../../../../../helpers";
 import { withPortPrefix } from "../../../../../helpers/ports";
 import { Auth, Project, User, backendContext, bumpEmailAddress, getOutboxEmails, niceBackendFetch, waitForOutboxEmailWithStatus } from "../../../../backend-helpers";
@@ -412,6 +413,7 @@ describe("send email to all users", () => {
           "can_have_delivery_info": false,
           "created_at_millis": <stripped field 'created_at_millis'>,
           "delivered_at_millis": <stripped field 'delivered_at_millis'>,
+          "failed_send_attempt_count": 0,
           "has_delivered": true,
           "has_rendered": true,
           "html": "<!DOCTYPE html PUBLIC \\"-//W3C//DTD XHTML 1.0 Transitional//EN\\" \\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\\"><html dir=\\"ltr\\" lang=\\"en\\"><head><meta content=\\"text/html; charset=UTF-8\\" http-equiv=\\"Content-Type\\"/><meta name=\\"x-apple-disable-message-reformatting\\"/></head><body style=\\"background-color:rgb(250,251,251)\\"><!--$--><table border=\\"0\\" width=\\"100%\\" cellPadding=\\"0\\" cellSpacing=\\"0\\" role=\\"presentation\\" align=\\"center\\"><tbody><tr><td style=\\"background-color:rgb(250,251,251);font-family:ui-sans-serif,system-ui,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;,&quot;Segoe UI Symbol&quot;,&quot;Noto Color Emoji&quot;;font-size:1rem;line-height:1.5\\"><table align=\\"center\\" width=\\"100%\\" border=\\"0\\" cellPadding=\\"0\\" cellSpacing=\\"0\\" role=\\"presentation\\" style=\\"max-width:37.5em;background-color:rgb(255,255,255);padding:45px;border-radius:0.5rem\\"><tbody><tr style=\\"width:100%\\"><td><div><p>All users test</p></div></td></tr></tbody></table></td></tr></tbody></table><!--7--><!--/$--></body></html>",
@@ -419,9 +421,11 @@ describe("send email to all users", () => {
           "is_high_priority": false,
           "is_paused": false,
           "is_transactional": true,
+          "next_send_retry_at_millis": null,
           "notification_category_id": "<stripped UUID>",
           "rendered_at_millis": <stripped field 'rendered_at_millis'>,
           "scheduled_at_millis": <stripped field 'scheduled_at_millis'>,
+          "send_attempt_errors": null,
           "simple_status": "ok",
           "skip_deliverability_check": false,
           "started_rendering_at_millis": <stripped field 'started_rendering_at_millis'>,
@@ -449,6 +453,7 @@ describe("send email to all users", () => {
           "can_have_delivery_info": false,
           "created_at_millis": <stripped field 'created_at_millis'>,
           "delivered_at_millis": <stripped field 'delivered_at_millis'>,
+          "failed_send_attempt_count": 0,
           "has_delivered": true,
           "has_rendered": true,
           "html": "<!DOCTYPE html PUBLIC \\"-//W3C//DTD XHTML 1.0 Transitional//EN\\" \\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\\"><html dir=\\"ltr\\" lang=\\"en\\"><head><meta content=\\"text/html; charset=UTF-8\\" http-equiv=\\"Content-Type\\"/><meta name=\\"x-apple-disable-message-reformatting\\"/></head><body style=\\"background-color:rgb(250,251,251)\\"><!--$--><table border=\\"0\\" width=\\"100%\\" cellPadding=\\"0\\" cellSpacing=\\"0\\" role=\\"presentation\\" align=\\"center\\"><tbody><tr><td style=\\"background-color:rgb(250,251,251);font-family:ui-sans-serif,system-ui,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;,&quot;Segoe UI Symbol&quot;,&quot;Noto Color Emoji&quot;;font-size:1rem;line-height:1.5\\"><table align=\\"center\\" width=\\"100%\\" border=\\"0\\" cellPadding=\\"0\\" cellSpacing=\\"0\\" role=\\"presentation\\" style=\\"max-width:37.5em;background-color:rgb(255,255,255);padding:45px;border-radius:0.5rem\\"><tbody><tr style=\\"width:100%\\"><td><div><p>All users test</p></div></td></tr></tbody></table></td></tr></tbody></table><!--7--><!--/$--></body></html>",
@@ -456,9 +461,11 @@ describe("send email to all users", () => {
           "is_high_priority": false,
           "is_paused": false,
           "is_transactional": true,
+          "next_send_retry_at_millis": null,
           "notification_category_id": "<stripped UUID>",
           "rendered_at_millis": <stripped field 'rendered_at_millis'>,
           "scheduled_at_millis": <stripped field 'scheduled_at_millis'>,
+          "send_attempt_errors": null,
           "simple_status": "ok",
           "skip_deliverability_check": false,
           "started_rendering_at_millis": <stripped field 'started_rendering_at_millis'>,
@@ -1846,3 +1853,330 @@ describe("email outbox pagination", () => {
   }, 60_000);
 });
 
+// Invalid SMTP config - causes HOST_NOT_FOUND (non-retryable error)
+const brokenSmtpConfig = {
+  type: "standard",
+  host: "this-host-does-not-exist.invalid",
+  port: 25,
+  username: "test",
+  password: "test",
+  sender_name: "Test Project",
+  sender_email: "test@example.com",
+} as const;
+
+// SMTP server that responds with 450 (temporary failure) - retryable and fast
+let tempFailServer: net.Server | null = null;
+let tempFailPort: number | null = null;
+
+async function startTempFailSmtpServer(): Promise<number> {
+  if (tempFailServer) {
+    return tempFailPort!;
+  }
+
+  return await new Promise((resolve, reject) => {
+    tempFailServer = net.createServer((socket) => {
+      // Send SMTP greeting
+      socket.write('220 localhost SMTP Test Server\r\n');
+
+      socket.on('data', (data) => {
+        const command = data.toString().trim().toUpperCase();
+        // Respond with 450 (temporary failure) to all commands
+        // This is a retryable error that happens immediately
+        if (command.startsWith('EHLO') || command.startsWith('HELO')) {
+          socket.write('250 localhost Hello\r\n');
+        } else if (command.startsWith('MAIL FROM')) {
+          // Temporary failure - "mailbox unavailable, try again later"
+          socket.write('450 Requested mail action not taken: mailbox unavailable (test)\r\n');
+          socket.end();
+        } else if (command.startsWith('QUIT')) {
+          socket.write('221 Bye\r\n');
+          socket.end();
+        } else {
+          socket.write('450 Temporary failure (test)\r\n');
+        }
+      });
+
+      socket.on('error', () => {
+        // Ignore errors (client may disconnect)
+      });
+    });
+
+    tempFailServer.listen(0, '127.0.0.1', () => {
+      const address = tempFailServer!.address();
+      if (typeof address === 'object' && address !== null) {
+        tempFailPort = address.port;
+        resolve(tempFailPort);
+      } else {
+        reject(new Error('Failed to get server address'));
+      }
+    });
+
+    tempFailServer.on('error', reject);
+  });
+}
+
+function stopTempFailSmtpServer(): void {
+  if (tempFailServer) {
+    tempFailServer.close();
+    tempFailServer = null;
+    tempFailPort = null;
+  }
+}
+
+// Factory function to create temp-fail SMTP config with dynamic port
+function createTempFailSmtpConfig(port: number) {
+  return {
+    type: "standard",
+    host: "127.0.0.1",
+    port: port,
+    username: "test",
+    password: "test",
+    sender_name: "Test Project",
+    sender_email: "test@example.com",
+  } as const;
+}
+
+// Helper type for send attempt error entries
+type SendAttemptErrorEntry = {
+  attempt_number: number,
+  timestamp: string,
+  external_message: string,
+  external_details: Record<string, unknown>,
+  internal_message: string,
+  internal_details: Record<string, unknown>,
+};
+
+// Helper type for email outbox items with retry fields
+type OutboxEmailWithRetryFields = OutboxEmail & {
+  failed_send_attempt_count: number,
+  next_send_retry_at_millis: number | null,
+  send_attempt_errors: SendAttemptErrorEntry[] | null,
+};
+
+// Helper to get detailed email from the outbox
+async function getOutboxEmailById(emailId: string): Promise<OutboxEmailWithRetryFields> {
+  const response = await niceBackendFetch(`/api/v1/emails/outbox/${emailId}`, {
+    method: "GET",
+    accessType: "server",
+  });
+  return response.body;
+}
+
+// Helper to poll until an email with the given subject appears in the outbox
+async function waitForOutboxEmail(subject: string, timeoutMs = 30000): Promise<OutboxEmailWithRetryFields> {
+  const startTime = Date.now();
+  while (Date.now() - startTime < timeoutMs) {
+    const emails = await getOutboxEmails({ subject });
+    if (emails.length > 0) {
+      return await getOutboxEmailById(emails[0].id);
+    }
+    await wait(500);
+  }
+  throw new StackAssertionError(
+    `Timeout waiting for email with subject "${subject}" to appear in outbox`,
+    { subject }
+  );
+}
+
+// Helper to poll until the email has reached a specific failed_send_attempt_count
+// Note: Status may be "queued" or "sending" due to race conditions - that's expected
+async function waitForAttemptCount(emailId: string, attemptCount: number, timeoutMs = 60000): Promise<OutboxEmailWithRetryFields> {
+  const startTime = Date.now();
+  while (Date.now() - startTime < timeoutMs) {
+    const email = await getOutboxEmailById(emailId);
+    if (email.failed_send_attempt_count >= attemptCount) {
+      return email;
+    }
+    // Terminal state - no more retries will happen
+    if (email.status === "server-error") {
+      return email;
+    }
+    await wait(500);
+  }
+  const finalEmail = await getOutboxEmailById(emailId);
+  throw new StackAssertionError(
+    `Timeout waiting for email ${emailId} to reach failed_send_attempt_count >= ${attemptCount}`,
+    { emailId, attemptCount, finalState: { count: finalEmail.failed_send_attempt_count, status: finalEmail.status } }
+  );
+}
+
+describe("email queue deferred retry logic", () => {
+  it("should immediately mark non-retryable errors as server-error without retrying", async ({ expect }) => {
+    // brokenSmtpConfig causes HOST_NOT_FOUND which has canRetry: false
+    await Project.createAndSwitch({
+      display_name: "Test Non-Retryable Error Project",
+      config: {
+        email_config: brokenSmtpConfig,
+      },
+    });
+
+    const mailbox = backendContext.value.mailbox;
+    const createUserResponse = await niceBackendFetch("/api/v1/users", {
+      method: "POST",
+      accessType: "server",
+      body: {
+        primary_email: mailbox.emailAddress,
+        primary_email_verified: true,
+      },
+    });
+    expect(createUserResponse.status).toBe(201);
+    const userId = createUserResponse.body.id;
+
+    const sendResponse = await niceBackendFetch("/api/v1/emails/send-email", {
+      method: "POST",
+      accessType: "server",
+      body: {
+        user_ids: [userId],
+        html: "Test email for non-retryable error",
+        subject: "Non-Retryable Error Test",
+      },
+    });
+    expect(sendResponse.status).toBe(200);
+
+    // Wait for the email to appear in the outbox and reach server-error state
+    const initialEmail = await waitForOutboxEmail("Non-Retryable Error Test");
+    const emailId = initialEmail.id;
+
+    // Wait for the email to reach server-error status
+    const maxWaitMs = 30000;
+    const startTime = Date.now();
+    let email = initialEmail;
+    while (Date.now() - startTime < maxWaitMs && email.status !== "server-error") {
+      await wait(500);
+      email = await getOutboxEmailById(emailId);
+    }
+
+    // Non-retryable errors should go directly to server-error
+    expect(email.failed_send_attempt_count).toBe(1);
+    expect(email.next_send_retry_at_millis).toBeNull();
+    expect(email.send_attempt_errors).not.toBeNull();
+    expect(email.send_attempt_errors?.length).toBe(1);
+    expect(email.status).toBe("server-error");
+
+    logIfTestFails("Email after non-retryable error", email);
+  });
+
+  describe("retryable errors (using temp-fail SMTP server)", () => {
+    // These tests use a local SMTP server that responds with 450 (temporary failure).
+    // This is fast (immediate response) and produces retryable errors.
+
+    let tempFailSmtpConfig: ReturnType<typeof createTempFailSmtpConfig>;
+
+    beforeAll(async () => {
+      const port = await startTempFailSmtpServer();
+      tempFailSmtpConfig = createTempFailSmtpConfig(port);
+    });
+
+    afterAll(() => {
+      stopTempFailSmtpServer();
+    });
+
+    it("should schedule retry on retryable failure and release email for next iteration", { timeout: 60000 }, async ({ expect }) => {
+      await Project.createAndSwitch({
+        display_name: "Test Deferred Retry Project",
+        config: {
+          email_config: tempFailSmtpConfig,
+        },
+      });
+
+      const mailbox = backendContext.value.mailbox;
+      const createUserResponse = await niceBackendFetch("/api/v1/users", {
+        method: "POST",
+        accessType: "server",
+        body: {
+          primary_email: mailbox.emailAddress,
+          primary_email_verified: true,
+        },
+      });
+      expect(createUserResponse.status).toBe(201);
+      const userId = createUserResponse.body.id;
+
+      const sendResponse = await niceBackendFetch("/api/v1/emails/send-email", {
+        method: "POST",
+        accessType: "server",
+        body: {
+          user_ids: [userId],
+          html: "Test email for retry logic",
+          subject: "Retry Test Email",
+        },
+      });
+      expect(sendResponse.status).toBe(200);
+
+      // Wait for the email to appear in the outbox
+      const initialEmail = await waitForOutboxEmail("Retry Test Email");
+      const emailId = initialEmail.id;
+
+      // Wait for first send attempt to complete (450 response is immediate)
+      const emailAfterFirstAttempt = await waitForAttemptCount(emailId, 1, 30000);
+
+      // Verify the email was released for a DIFFERENT queue iteration to pick up
+      // - status should NOT be server-error (retries remaining)
+      expect(emailAfterFirstAttempt.failed_send_attempt_count).toBe(1);
+      expect(emailAfterFirstAttempt.send_attempt_errors).not.toBeNull();
+      expect(emailAfterFirstAttempt.send_attempt_errors?.length).toBe(1);
+      expect(emailAfterFirstAttempt.send_attempt_errors?.[0].attempt_number).toBe(1);
+      expect(emailAfterFirstAttempt.send_attempt_errors?.[0].external_message).toContain("450");
+      expect(emailAfterFirstAttempt.status).not.toBe("server-error");
+
+      // Status should be "scheduled" (isQueued=false, waiting for nextSendRetryAt) or "sending" (next iteration already picked it up)
+      expect(["scheduled", "sending"]).toContain(emailAfterFirstAttempt.status);
+
+      logIfTestFails("Email after first retry attempt", emailAfterFirstAttempt);
+    });
+
+    it("should retry emails until max attempts exhausted, then mark as server-error", { timeout: 90000 }, async ({ expect }) => {
+      await Project.createAndSwitch({
+        display_name: "Test Retry Exhaustion Project",
+        config: {
+          email_config: tempFailSmtpConfig,
+        },
+      });
+
+      const mailbox = backendContext.value.mailbox;
+      const createUserResponse = await niceBackendFetch("/api/v1/users", {
+        method: "POST",
+        accessType: "server",
+        body: {
+          primary_email: mailbox.emailAddress,
+          primary_email_verified: true,
+        },
+      });
+      expect(createUserResponse.status).toBe(201);
+      const userId = createUserResponse.body.id;
+
+      const sendResponse = await niceBackendFetch("/api/v1/emails/send-email", {
+        method: "POST",
+        accessType: "server",
+        body: {
+          user_ids: [userId],
+          html: "Test email for retry exhaustion",
+          subject: "Retry Exhaustion Test",
+        },
+      });
+      expect(sendResponse.status).toBe(200);
+
+      // Wait for the email to appear in the outbox
+      const initialEmail = await waitForOutboxEmail("Retry Exhaustion Test");
+      const emailId = initialEmail.id;
+
+      // Wait for all retries to exhaust (MAX_SEND_ATTEMPTS = 3)
+      // With 450 errors (immediate) + exponential backoff, this should complete in ~30s
+      const maxWaitMs = 60000;
+      const startTime = Date.now();
+      let email = await getOutboxEmailById(emailId);
+      while (Date.now() - startTime < maxWaitMs && email.status !== "server-error") {
+        await wait(1000);
+        email = await getOutboxEmailById(emailId);
+      }
+
+      expect(email.status).toBe("server-error");
+      expect(email.failed_send_attempt_count).toBe(3); // MAX_SEND_ATTEMPTS
+      expect(email.send_attempt_errors?.length).toBe(3);
+      // No more retries scheduled
+      expect(email.next_send_retry_at_millis).toBeNull();
+
+      logIfTestFails("Email after all retries exhausted", email);
+    });
+  });
+});
+

From a9bebf222031b6c8dfabe603fa86fb9a2e3f2475 Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Wed, 11 Feb 2026 19:51:28 -0800
Subject: [PATCH 05/12] fix: skipped, cancelled emails cant be retried now

---
 apps/backend/src/app/api/latest/emails/outbox/crud.tsx | 1 +
 apps/backend/src/lib/email-queue-step.tsx              | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/apps/backend/src/app/api/latest/emails/outbox/crud.tsx b/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
index 42d76eec31..3b61c65a5d 100644
--- a/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
+++ b/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
@@ -381,6 +381,7 @@ export const emailOutboxCrudHandlers = createLazyProxy(() => createCrudHandlers(
       // Cancel action - mark as skipped
       set("isPaused", Prisma.sql`false`);
       set("isQueued", Prisma.sql`false`);
+      setNull("nextSendRetryAt"); // Clear any pending retry so it won't be picked up
       set("skippedReason", Prisma.sql`'MANUALLY_CANCELLED'::"EmailOutboxSkippedReason"`);
       set("skippedDetails", Prisma.sql`'{}'::jsonb`);
     } else {
diff --git a/apps/backend/src/lib/email-queue-step.tsx b/apps/backend/src/lib/email-queue-step.tsx
index e82b22d7a9..01a8b04951 100644
--- a/apps/backend/src/lib/email-queue-step.tsx
+++ b/apps/backend/src/lib/email-queue-step.tsx
@@ -523,6 +523,8 @@ async function prepareSendPlan(deltaSeconds: number): Promise<TenancySendBatch[]
   const tenancyIds = await globalPrismaClient.emailOutbox.findMany({
     where: {
       isPaused: false,
+      skippedReason: null, // Don't process skipped/cancelled emails
+      finishedSendingAt: null, // Don't process already-finished emails (defense in depth)
       OR: [
         // Normal case: queued, not started, and no pending retry
         { startedSendingAt: null, isQueued: true, nextSendRetryAt: null },
@@ -560,6 +562,8 @@ async function claimEmailsForSending(tx: PrismaClientTransaction, tenancyId: str
       FROM "EmailOutbox"
       WHERE "tenancyId" = ${tenancyId}::uuid
         AND "isPaused" = FALSE
+        AND "skippedReason" IS NULL  -- Don't process skipped/cancelled emails
+        AND "finishedSendingAt" IS NULL  -- Don't process already-finished emails (defense in depth)
         AND "finishedRenderingAt" IS NOT NULL
         AND (
           -- Normal case: queued, not started, and no pending retry

From 9b3c545f2896620ff8cb7df02775aa23a83593fb Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Wed, 11 Feb 2026 20:34:36 -0800
Subject: [PATCH 06/12] chore: rename failedSendAttempts and backoff number

---
 .../migration.sql                             |  8 +++----
 apps/backend/prisma/schema.prisma             | 12 +++++-----
 .../src/app/api/latest/emails/outbox/crud.tsx |  8 +++----
 apps/backend/src/lib/email-queue-step.tsx     | 12 +++++-----
 .../api/v1/emails/email-queue.test.ts         | 24 +++++++++----------
 .../api/v1/emails/outbox-api.test.ts          |  3 +++
 .../src/interface/crud/email-outbox.ts        |  2 +-
 .../apps/implementations/admin-app-impl.ts    |  2 +-
 .../template/src/lib/stack-app/email/index.ts |  2 +-
 9 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql b/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql
index b8f3adc729..46243b8b79 100644
--- a/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql
+++ b/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql
@@ -3,20 +3,20 @@
 -- instead of blocking the current iteration with inline retries.
 
 ALTER TABLE "EmailOutbox"
-  ADD COLUMN "failedSendAttemptCount" INTEGER NOT NULL DEFAULT 0,
+  ADD COLUMN "sendRetries" INTEGER NOT NULL DEFAULT 0,
   ADD COLUMN "nextSendRetryAt" TIMESTAMP(3),
   ADD COLUMN "sendAttemptErrors" JSONB;
 
 -- Constraint: nextSendRetryAt can only be set after at least one failed attempt
--- (if failedSendAttemptCount is 0, no attempt has failed, so there's nothing to retry)
+-- (if sendRetries is 0, no attempt has failed, so there's nothing to retry)
 ALTER TABLE "EmailOutbox"
   ADD CONSTRAINT "EmailOutbox_nextSendRetryAt_requires_failure"
-  CHECK ("nextSendRetryAt" IS NULL OR "failedSendAttemptCount" > 0);
+  CHECK ("nextSendRetryAt" IS NULL OR "sendRetries" > 0);
 
 -- Constraint: sendAttemptErrors can only be set after at least one failed attempt
 ALTER TABLE "EmailOutbox"
   ADD CONSTRAINT "EmailOutbox_sendAttemptErrors_requires_failure"
-  CHECK ("sendAttemptErrors" IS NULL OR "failedSendAttemptCount" > 0);
+  CHECK ("sendAttemptErrors" IS NULL OR "sendRetries" > 0);
 
 -- Constraint: nextSendRetryAt must be null when email has finished sending
 -- (if finishedSendingAt is set, there's nothing more to retry)
diff --git a/apps/backend/prisma/schema.prisma b/apps/backend/prisma/schema.prisma
index 7712ed8713..6d608df609 100644
--- a/apps/backend/prisma/schema.prisma
+++ b/apps/backend/prisma/schema.prisma
@@ -845,12 +845,12 @@ model EmailOutbox {
   finishedSendingAt DateTime?
 
   // Deferred retry fields for email sending
-  // Number of failed send attempts (starts at 0, incremented on each failure). Reset when email content is edited.
-  failedSendAttemptCount Int       @default(0)
-  // When to retry sending (null = not waiting for retry). Set when a retryable error occurs. Reset when email content is edited. Must be null if failedSendAttemptCount is 0 (enforced by EmailOutbox_nextSendRetryAt_requires_failure). Must be null when finishedSendingAt is set (enforced by EmailOutbox_no_retry_after_finished).
-  nextSendRetryAt        DateTime?
-  // JSON array of errors from each failed send attempt. Each entry has: { attemptNumber, timestamp, externalMessage, externalDetails, internalMessage, internalDetails }. Reset when email content is edited. Must be null if failedSendAttemptCount is 0 (enforced by EmailOutbox_sendAttemptErrors_requires_failure).
-  sendAttemptErrors      Json?
+  // Number of send retries attempted (starts at 0, incremented on each failure). Reset when email content is edited.
+  sendRetries       Int       @default(0)
+  // When to retry sending (null = not waiting for retry). Set when a retryable error occurs. Reset when email content is edited. Must be null if sendRetries is 0 (enforced by EmailOutbox_nextSendRetryAt_requires_failure). Must be null when finishedSendingAt is set (enforced by EmailOutbox_no_retry_after_finished).
+  nextSendRetryAt   DateTime?
+  // JSON array of errors from each failed send attempt. Each entry has: { attemptNumber, timestamp, externalMessage, externalDetails, internalMessage, internalDetails }. Reset when email content is edited. Must be null if sendRetries is 0 (enforced by EmailOutbox_sendAttemptErrors_requires_failure).
+  sendAttemptErrors Json?
 
   // A generated column that is equal to finishedSendingAt if canHaveDeliveryInfo is false, otherwise deliveredAt.
   sentAt DateTime? @default(dbgenerated("\nCASE\n    WHEN (\"canHaveDeliveryInfo\" IS TRUE) THEN \"deliveredAt\"\n    WHEN (\"canHaveDeliveryInfo\" IS FALSE) THEN \"finishedSendingAt\"\n    ELSE NULL::timestamp without time zone\nEND"))
diff --git a/apps/backend/src/app/api/latest/emails/outbox/crud.tsx b/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
index 3b61c65a5d..6ecf5ad789 100644
--- a/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
+++ b/apps/backend/src/app/api/latest/emails/outbox/crud.tsx
@@ -88,7 +88,7 @@ function prismaModelToCrud(prismaModel: EmailOutbox): EmailOutboxCrud["Server"][
     variables: (prismaModel.extraRenderVariables ?? {}) as Record<string, any>,
     skip_deliverability_check: prismaModel.shouldSkipDeliverabilityCheck,
     scheduled_at_millis: prismaModel.scheduledAt.getTime(),
-    failed_send_attempt_count: prismaModel.failedSendAttemptCount,
+    send_retries: prismaModel.sendRetries,
     next_send_retry_at_millis: prismaModel.nextSendRetryAt?.getTime() ?? null,
     send_attempt_errors: sendAttemptErrors,
     // Default flags (overridden in specific statuses)
@@ -419,8 +419,8 @@ export const emailOutboxCrudHandlers = createLazyProxy(() => createCrudHandlers(
       // If content changed, reset rendering and sending state
       if (needsRerenderReset) {
         set("isQueued", Prisma.sql`false`);
-        // Reset retry fields (failedSendAttemptCount to 0, others to null)
-        set("failedSendAttemptCount", Prisma.sql`0`);
+        // Reset retry fields (sendRetries to 0, others to null)
+        set("sendRetries", Prisma.sql`0`);
         setNull(
           "renderedByWorkerId", "startedRenderingAt", "finishedRenderingAt",
           "renderErrorExternalMessage", "renderErrorExternalDetails",
@@ -521,7 +521,7 @@ function parseEmailOutboxFromJson(j: Record<string, unknown>): EmailOutbox {
     scheduledAtIfNotYetQueued: dateOrNull("scheduledAtIfNotYetQueued"),
     startedSendingAt: dateOrNull("startedSendingAt"),
     finishedSendingAt: dateOrNull("finishedSendingAt"),
-    failedSendAttemptCount: j.failedSendAttemptCount as number,
+    sendRetries: j.sendRetries as number,
     nextSendRetryAt: dateOrNull("nextSendRetryAt"),
     sendAttemptErrors: j.sendAttemptErrors as Prisma.JsonValue,
     sentAt: dateOrNull("sentAt"),
diff --git a/apps/backend/src/lib/email-queue-step.tsx b/apps/backend/src/lib/email-queue-step.tsx
index 01a8b04951..cb84901be0 100644
--- a/apps/backend/src/lib/email-queue-step.tsx
+++ b/apps/backend/src/lib/email-queue-step.tsx
@@ -21,10 +21,10 @@ const MAX_RENDER_BATCH = 50;
 
 const MAX_SEND_ATTEMPTS = 3;
 
-const RETRY_BACKOFF_BASE_MS = 2000;
+const SEND_RETRY_BACKOFF_BASE_MS = 2000;
 
 const calculateRetryBackoffMs = (attemptCount: number): number => {
-  return (Math.random() + 0.5) * RETRY_BACKOFF_BASE_MS * Math.pow(2, attemptCount);
+  return (Math.random() + 0.5) * SEND_RETRY_BACKOFF_BASE_MS * Math.pow(2, attemptCount);
 };
 
 /**
@@ -697,7 +697,7 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
       });
 
     if (result.status === "error") {
-      const newAttemptCount = row.failedSendAttemptCount + 1;
+      const newAttemptCount = row.sendRetries + 1;
       const canRetry = result.error.canRetry && newAttemptCount < MAX_SEND_ATTEMPTS;
 
       // Build error entry for this attempt
@@ -725,7 +725,7 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
           data: {
             startedSendingAt: null, // Unclaim the email
             isQueued: false, // Prevent normal queue path from picking it up
-            failedSendAttemptCount: newAttemptCount,
+            sendRetries: newAttemptCount,
             nextSendRetryAt: new Date(Date.now() + backoffMs),
             sendAttemptErrors: updatedErrors as Prisma.InputJsonArray,
           },
@@ -760,7 +760,7 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
           data: {
             finishedSendingAt: new Date(),
             canHaveDeliveryInfo: false,
-            failedSendAttemptCount: newAttemptCount,
+            sendRetries: newAttemptCount,
             sendAttemptErrors: updatedErrors as Prisma.InputJsonArray,
             sendServerErrorExternalMessage: externalMessage,
             sendServerErrorExternalDetails: { errorType: result.error.errorType },
@@ -776,7 +776,7 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
         });
       }
     } else {
-      // Success - mark as sent (don't increment failedSendAttemptCount since this wasn't a failure)
+      // Success - mark as sent (don't increment sendRetries since this wasn't a failure)
       await globalPrismaClient.emailOutbox.update({
         where: {
           tenancyId_id: {
diff --git a/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts b/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
index adf1d8f2fa..aa92f66541 100644
--- a/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
+++ b/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
@@ -413,7 +413,6 @@ describe("send email to all users", () => {
           "can_have_delivery_info": false,
           "created_at_millis": <stripped field 'created_at_millis'>,
           "delivered_at_millis": <stripped field 'delivered_at_millis'>,
-          "failed_send_attempt_count": 0,
           "has_delivered": true,
           "has_rendered": true,
           "html": "<!DOCTYPE html PUBLIC \\"-//W3C//DTD XHTML 1.0 Transitional//EN\\" \\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\\"><html dir=\\"ltr\\" lang=\\"en\\"><head><meta content=\\"text/html; charset=UTF-8\\" http-equiv=\\"Content-Type\\"/><meta name=\\"x-apple-disable-message-reformatting\\"/></head><body style=\\"background-color:rgb(250,251,251)\\"><!--$--><table border=\\"0\\" width=\\"100%\\" cellPadding=\\"0\\" cellSpacing=\\"0\\" role=\\"presentation\\" align=\\"center\\"><tbody><tr><td style=\\"background-color:rgb(250,251,251);font-family:ui-sans-serif,system-ui,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;,&quot;Segoe UI Symbol&quot;,&quot;Noto Color Emoji&quot;;font-size:1rem;line-height:1.5\\"><table align=\\"center\\" width=\\"100%\\" border=\\"0\\" cellPadding=\\"0\\" cellSpacing=\\"0\\" role=\\"presentation\\" style=\\"max-width:37.5em;background-color:rgb(255,255,255);padding:45px;border-radius:0.5rem\\"><tbody><tr style=\\"width:100%\\"><td><div><p>All users test</p></div></td></tr></tbody></table></td></tr></tbody></table><!--7--><!--/$--></body></html>",
@@ -426,6 +425,7 @@ describe("send email to all users", () => {
           "rendered_at_millis": <stripped field 'rendered_at_millis'>,
           "scheduled_at_millis": <stripped field 'scheduled_at_millis'>,
           "send_attempt_errors": null,
+          "send_retries": 0,
           "simple_status": "ok",
           "skip_deliverability_check": false,
           "started_rendering_at_millis": <stripped field 'started_rendering_at_millis'>,
@@ -453,7 +453,6 @@ describe("send email to all users", () => {
           "can_have_delivery_info": false,
           "created_at_millis": <stripped field 'created_at_millis'>,
           "delivered_at_millis": <stripped field 'delivered_at_millis'>,
-          "failed_send_attempt_count": 0,
           "has_delivered": true,
           "has_rendered": true,
           "html": "<!DOCTYPE html PUBLIC \\"-//W3C//DTD XHTML 1.0 Transitional//EN\\" \\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\\"><html dir=\\"ltr\\" lang=\\"en\\"><head><meta content=\\"text/html; charset=UTF-8\\" http-equiv=\\"Content-Type\\"/><meta name=\\"x-apple-disable-message-reformatting\\"/></head><body style=\\"background-color:rgb(250,251,251)\\"><!--$--><table border=\\"0\\" width=\\"100%\\" cellPadding=\\"0\\" cellSpacing=\\"0\\" role=\\"presentation\\" align=\\"center\\"><tbody><tr><td style=\\"background-color:rgb(250,251,251);font-family:ui-sans-serif,system-ui,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;,&quot;Segoe UI Symbol&quot;,&quot;Noto Color Emoji&quot;;font-size:1rem;line-height:1.5\\"><table align=\\"center\\" width=\\"100%\\" border=\\"0\\" cellPadding=\\"0\\" cellSpacing=\\"0\\" role=\\"presentation\\" style=\\"max-width:37.5em;background-color:rgb(255,255,255);padding:45px;border-radius:0.5rem\\"><tbody><tr style=\\"width:100%\\"><td><div><p>All users test</p></div></td></tr></tbody></table></td></tr></tbody></table><!--7--><!--/$--></body></html>",
@@ -466,6 +465,7 @@ describe("send email to all users", () => {
           "rendered_at_millis": <stripped field 'rendered_at_millis'>,
           "scheduled_at_millis": <stripped field 'scheduled_at_millis'>,
           "send_attempt_errors": null,
+          "send_retries": 0,
           "simple_status": "ok",
           "skip_deliverability_check": false,
           "started_rendering_at_millis": <stripped field 'started_rendering_at_millis'>,
@@ -1948,7 +1948,7 @@ type SendAttemptErrorEntry = {
 
 // Helper type for email outbox items with retry fields
 type OutboxEmailWithRetryFields = OutboxEmail & {
-  failed_send_attempt_count: number,
+  send_retries: number,
   next_send_retry_at_millis: number | null,
   send_attempt_errors: SendAttemptErrorEntry[] | null,
 };
@@ -1978,13 +1978,13 @@ async function waitForOutboxEmail(subject: string, timeoutMs = 30000): Promise<O
   );
 }
 
-// Helper to poll until the email has reached a specific failed_send_attempt_count
+// Helper to poll until the email has reached a specific send_retries
 // Note: Status may be "queued" or "sending" due to race conditions - that's expected
 async function waitForAttemptCount(emailId: string, attemptCount: number, timeoutMs = 60000): Promise<OutboxEmailWithRetryFields> {
   const startTime = Date.now();
   while (Date.now() - startTime < timeoutMs) {
     const email = await getOutboxEmailById(emailId);
-    if (email.failed_send_attempt_count >= attemptCount) {
+    if (email.send_retries >= attemptCount) {
       return email;
     }
     // Terminal state - no more retries will happen
@@ -1995,8 +1995,8 @@ async function waitForAttemptCount(emailId: string, attemptCount: number, timeou
   }
   const finalEmail = await getOutboxEmailById(emailId);
   throw new StackAssertionError(
-    `Timeout waiting for email ${emailId} to reach failed_send_attempt_count >= ${attemptCount}`,
-    { emailId, attemptCount, finalState: { count: finalEmail.failed_send_attempt_count, status: finalEmail.status } }
+    `Timeout waiting for email ${emailId} to reach send_retries >= ${attemptCount}`,
+    { emailId, attemptCount, finalState: { count: finalEmail.send_retries, status: finalEmail.status } }
   );
 }
 
@@ -2047,7 +2047,7 @@ describe("email queue deferred retry logic", () => {
     }
 
     // Non-retryable errors should go directly to server-error
-    expect(email.failed_send_attempt_count).toBe(1);
+    expect(email.send_retries).toBe(1);
     expect(email.next_send_retry_at_millis).toBeNull();
     expect(email.send_attempt_errors).not.toBeNull();
     expect(email.send_attempt_errors?.length).toBe(1);
@@ -2111,7 +2111,7 @@ describe("email queue deferred retry logic", () => {
 
       // Verify the email was released for a DIFFERENT queue iteration to pick up
       // - status should NOT be server-error (retries remaining)
-      expect(emailAfterFirstAttempt.failed_send_attempt_count).toBe(1);
+      expect(emailAfterFirstAttempt.send_retries).toBe(1);
       expect(emailAfterFirstAttempt.send_attempt_errors).not.toBeNull();
       expect(emailAfterFirstAttempt.send_attempt_errors?.length).toBe(1);
       expect(emailAfterFirstAttempt.send_attempt_errors?.[0].attempt_number).toBe(1);
@@ -2159,7 +2159,7 @@ describe("email queue deferred retry logic", () => {
       const initialEmail = await waitForOutboxEmail("Retry Exhaustion Test");
       const emailId = initialEmail.id;
 
-      // Wait for all retries to exhaust (MAX_SEND_ATTEMPTS = 3)
+      // Wait for all retries to exhaust (MAX_SEND_ATTEMPTS = 5)
       // With 450 errors (immediate) + exponential backoff, this should complete in ~30s
       const maxWaitMs = 60000;
       const startTime = Date.now();
@@ -2170,8 +2170,8 @@ describe("email queue deferred retry logic", () => {
       }
 
       expect(email.status).toBe("server-error");
-      expect(email.failed_send_attempt_count).toBe(3); // MAX_SEND_ATTEMPTS
-      expect(email.send_attempt_errors?.length).toBe(3);
+      expect(email.send_retries).toBe(5); // MAX_SEND_ATTEMPTS
+      expect(email.send_attempt_errors?.length).toBe(5);
       // No more retries scheduled
       expect(email.next_send_retry_at_millis).toBeNull();
 
diff --git a/apps/e2e/tests/backend/endpoints/api/v1/emails/outbox-api.test.ts b/apps/e2e/tests/backend/endpoints/api/v1/emails/outbox-api.test.ts
index ff31fa42db..503086930a 100644
--- a/apps/e2e/tests/backend/endpoints/api/v1/emails/outbox-api.test.ts
+++ b/apps/e2e/tests/backend/endpoints/api/v1/emails/outbox-api.test.ts
@@ -630,7 +630,10 @@ describe("email outbox API", () => {
                 "has_rendered": false,
                 "id": "<stripped UUID>",
                 "is_paused": true,
+                "next_send_retry_at_millis": null,
                 "scheduled_at_millis": <stripped field 'scheduled_at_millis'>,
+                "send_attempt_errors": null,
+                "send_retries": 0,
                 "simple_status": "in-progress",
                 "skip_deliverability_check": false,
                 "status": "paused",
diff --git a/packages/stack-shared/src/interface/crud/email-outbox.ts b/packages/stack-shared/src/interface/crud/email-outbox.ts
index ddc785bd83..9759289cd0 100644
--- a/packages/stack-shared/src/interface/crud/email-outbox.ts
+++ b/packages/stack-shared/src/interface/crud/email-outbox.ts
@@ -41,7 +41,7 @@ const emailOutboxBaseSchema = fieldSchema.yupObject({
   scheduled_at_millis: fieldSchema.yupNumber().defined(),
 
   // Retry-related fields (for debugging/testing deferred retry logic)
-  failed_send_attempt_count: fieldSchema.yupNumber().defined(),
+  send_retries: fieldSchema.yupNumber().defined(),
   next_send_retry_at_millis: fieldSchema.yupNumber().nullable().defined(),
   // Array of errors from each failed send attempt, each with internal/external messages
   send_attempt_errors: fieldSchema.yupArray(fieldSchema.yupObject({
diff --git a/packages/template/src/lib/stack-app/apps/implementations/admin-app-impl.ts b/packages/template/src/lib/stack-app/apps/implementations/admin-app-impl.ts
index 9dd25fe9a4..d7b0d41a93 100644
--- a/packages/template/src/lib/stack-app/apps/implementations/admin-app-impl.ts
+++ b/packages/template/src/lib/stack-app/apps/implementations/admin-app-impl.ts
@@ -734,7 +734,7 @@ export class _StackAdminAppImplIncomplete<HasTokenStore extends boolean, Project
       hasRendered: false as const,
       hasDelivered: false as const,
       // Retry tracking fields
-      failedSendAttemptCount: crud.failed_send_attempt_count as number,
+      sendRetries: crud.send_retries as number,
       nextSendRetryAt: crud.next_send_retry_at_millis ? new Date(crud.next_send_retry_at_millis) : null,
       sendAttemptErrors: crud.send_attempt_errors ? (crud.send_attempt_errors as Array<{
         attempt_number: number,
diff --git a/packages/template/src/lib/stack-app/email/index.ts b/packages/template/src/lib/stack-app/email/index.ts
index c3cb3e8d51..e33ea61166 100644
--- a/packages/template/src/lib/stack-app/email/index.ts
+++ b/packages/template/src/lib/stack-app/email/index.ts
@@ -59,7 +59,7 @@ type AdminEmailOutboxBase = {
   hasRendered: false,
   hasDelivered: false,
   // Retry tracking fields
-  failedSendAttemptCount: number,
+  sendRetries: number,
   nextSendRetryAt: Date | null,
   sendAttemptErrors: AdminSendAttemptError[] | null,
 };

From 4567fb7dde00c79d46e4f29b6497fb9711d598b3 Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Thu, 12 Feb 2026 10:26:34 -0800
Subject: [PATCH 07/12] chore: bump number of retries

---
 apps/backend/src/lib/email-queue-step.tsx     |  2 +-
 apps/e2e/tests/backend/backend-helpers.ts     |  2 +-
 .../api/v1/emails/email-queue.test.ts         | 21 +++++++++++++------
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/apps/backend/src/lib/email-queue-step.tsx b/apps/backend/src/lib/email-queue-step.tsx
index cb84901be0..4d2118a778 100644
--- a/apps/backend/src/lib/email-queue-step.tsx
+++ b/apps/backend/src/lib/email-queue-step.tsx
@@ -19,7 +19,7 @@ import { lowLevelSendEmailDirectWithoutRetries } from "./emails-low-level";
 
 const MAX_RENDER_BATCH = 50;
 
-const MAX_SEND_ATTEMPTS = 3;
+const MAX_SEND_ATTEMPTS = 5;
 
 const SEND_RETRY_BACKOFF_BASE_MS = 2000;
 
diff --git a/apps/e2e/tests/backend/backend-helpers.ts b/apps/e2e/tests/backend/backend-helpers.ts
index fc5d260dc5..cf87bad045 100644
--- a/apps/e2e/tests/backend/backend-helpers.ts
+++ b/apps/e2e/tests/backend/backend-helpers.ts
@@ -187,7 +187,7 @@ export async function bumpEmailAddress(options: { unindexed?: boolean } = {}) {
 }
 
 // Type for outbox email items (simplified - full type is EmailOutboxCrud["Server"]["Read"])
-type OutboxEmail = {
+export type OutboxEmail = {
   id: string,
   subject?: string,
   status: string,
diff --git a/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts b/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
index aa92f66541..24d8d4aabf 100644
--- a/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
+++ b/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
@@ -1,3 +1,4 @@
+import { StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
 import { wait } from "@stackframe/stack-shared/dist/utils/promises";
 import { deindent, nicify } from "@stackframe/stack-shared/dist/utils/strings";
 import beautify from "js-beautify";
@@ -5,7 +6,7 @@ import * as net from "net";
 import { afterAll, beforeAll, describe } from "vitest";
 import { it, logIfTestFails } from "../../../../../helpers";
 import { withPortPrefix } from "../../../../../helpers/ports";
-import { Auth, Project, User, backendContext, bumpEmailAddress, getOutboxEmails, niceBackendFetch, waitForOutboxEmailWithStatus } from "../../../../backend-helpers";
+import { Auth, InternalApiKey, OutboxEmail, Project, User, backendContext, bumpEmailAddress, getOutboxEmails, niceBackendFetch, waitForOutboxEmailWithStatus } from "../../../../backend-helpers";
 
 const testEmailConfig = {
   type: "standard",
@@ -1959,6 +1960,9 @@ async function getOutboxEmailById(emailId: string): Promise<OutboxEmailWithRetry
     method: "GET",
     accessType: "server",
   });
+  if (response.status !== 200) {
+    throw new StackAssertionError(`Failed to get email ${emailId}: status ${response.status}`, { response });
+  }
   return response.body;
 }
 
@@ -2078,6 +2082,8 @@ describe("email queue deferred retry logic", () => {
           email_config: tempFailSmtpConfig,
         },
       });
+      // Create API keys that don't expire (JWT admin tokens expire in 60s which is too short for retry tests)
+      await InternalApiKey.createAndSetProjectKeys();
 
       const mailbox = backendContext.value.mailbox;
       const createUserResponse = await niceBackendFetch("/api/v1/users", {
@@ -2124,13 +2130,15 @@ describe("email queue deferred retry logic", () => {
       logIfTestFails("Email after first retry attempt", emailAfterFirstAttempt);
     });
 
-    it("should retry emails until max attempts exhausted, then mark as server-error", { timeout: 90000 }, async ({ expect }) => {
+    it("should retry emails until max attempts exhausted, then mark as server-error", { timeout: 150000 }, async ({ expect }) => {
       await Project.createAndSwitch({
         display_name: "Test Retry Exhaustion Project",
         config: {
           email_config: tempFailSmtpConfig,
         },
       });
+      // Create API keys that don't expire (JWT admin tokens expire in 60s which is too short for retry tests)
+      await InternalApiKey.createAndSetProjectKeys();
 
       const mailbox = backendContext.value.mailbox;
       const createUserResponse = await niceBackendFetch("/api/v1/users", {
@@ -2160,8 +2168,8 @@ describe("email queue deferred retry logic", () => {
       const emailId = initialEmail.id;
 
       // Wait for all retries to exhaust (MAX_SEND_ATTEMPTS = 5)
-      // With 450 errors (immediate) + exponential backoff, this should complete in ~30s
-      const maxWaitMs = 60000;
+      // With 450 errors (immediate) + exponential backoff (2s base * 2^attempt), worst case ~90s
+      const maxWaitMs = 120000;
       const startTime = Date.now();
       let email = await getOutboxEmailById(emailId);
       while (Date.now() - startTime < maxWaitMs && email.status !== "server-error") {
@@ -2169,13 +2177,14 @@ describe("email queue deferred retry logic", () => {
         email = await getOutboxEmailById(emailId);
       }
 
+      // Log the email object to help debug undefined status issues
+      logIfTestFails("Email after retry exhaustion loop", email);
+
       expect(email.status).toBe("server-error");
       expect(email.send_retries).toBe(5); // MAX_SEND_ATTEMPTS
       expect(email.send_attempt_errors?.length).toBe(5);
       // No more retries scheduled
       expect(email.next_send_retry_at_millis).toBeNull();
-
-      logIfTestFails("Email after all retries exhausted", email);
     });
   });
 });

From e669c08fac481e2287a473bd2c32f3ccc45c6af8 Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Thu, 12 Feb 2026 10:57:43 -0800
Subject: [PATCH 08/12] refactor: add more info to error handling

---
 apps/backend/src/lib/email-queue-step.tsx | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/apps/backend/src/lib/email-queue-step.tsx b/apps/backend/src/lib/email-queue-step.tsx
index 4d2118a778..0cc88eadae 100644
--- a/apps/backend/src/lib/email-queue-step.tsx
+++ b/apps/backend/src/lib/email-queue-step.tsx
@@ -698,7 +698,8 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
 
     if (result.status === "error") {
       const newAttemptCount = row.sendRetries + 1;
-      const canRetry = result.error.canRetry && newAttemptCount < MAX_SEND_ATTEMPTS;
+      const isAttemptsExhausted = result.error.canRetry && newAttemptCount >= MAX_SEND_ATTEMPTS;
+      const canRetry = result.error.canRetry && !isAttemptsExhausted;
 
       // Build error entry for this attempt
       const errorEntry: SendAttemptError = {
@@ -731,12 +732,12 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
           },
         });
       } else {
-        // Mark as permanent failure - distinguish between "attempts exhausted" and "permanent error from provider"
-        const isAttemptsExhausted = result.error.canRetry && newAttemptCount >= MAX_SEND_ATTEMPTS;
+        // Mark as permanent failure - either "attempts_exhausted" (retryable but hit limit) or "permanent_error" (non-retryable)
         const failureReason = isAttemptsExhausted ? "attempts_exhausted" : "permanent_error";
 
         if (isAttemptsExhausted) {
           captureError("email-queue-step-retries-exhausted", new StackAssertionError(`Email failed after ${newAttemptCount} attempts`, {
+            cause: result.error.rawError,
             emailId: row.id,
             tenancyId: row.tenancyId,
             errorType: result.error.errorType,
@@ -745,10 +746,6 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
           }));
         }
 
-        const externalMessage = isAttemptsExhausted
-          ? "Email could not be delivered after multiple attempts. Please verify your email configuration and try again."
-          : result.error.message;
-
         await globalPrismaClient.emailOutbox.update({
           where: {
             tenancyId_id: {
@@ -762,7 +759,7 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
             canHaveDeliveryInfo: false,
             sendRetries: newAttemptCount,
             sendAttemptErrors: updatedErrors as Prisma.InputJsonArray,
-            sendServerErrorExternalMessage: externalMessage,
+            sendServerErrorExternalMessage: result.error.message,
             sendServerErrorExternalDetails: { errorType: result.error.errorType },
             sendServerErrorInternalMessage: result.error.message,
             sendServerErrorInternalDetails: {

From 6ea82d7e4c1e3beee4481df30ffd4b552dee839f Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Thu, 12 Feb 2026 15:32:50 -0800
Subject: [PATCH 09/12] fix: retryable emails no longer skip the queuing logic

Before, retryable emails went from sending -> scheduled -> sending
Now, we move through the queuing logic.
This simplifies the state transition functions and lifts the retry checking logic to only one func
 queuing.
We also add some checks to queuing to have it be robust.
---
 apps/backend/src/lib/email-queue-step.tsx     | 56 ++++++++++---------
 .../api/v1/emails/email-queue.test.ts         | 29 +++++-----
 2 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/apps/backend/src/lib/email-queue-step.tsx b/apps/backend/src/lib/email-queue-step.tsx
index 0cc88eadae..3200770308 100644
--- a/apps/backend/src/lib/email-queue-step.tsx
+++ b/apps/backend/src/lib/email-queue-step.tsx
@@ -502,15 +502,30 @@ async function renderTenancyEmails(workerId: string, tenancyId: string, group: E
 }
 
 async function queueReadyEmails(): Promise<{ queuedCount: number }> {
+  // Queue emails that are ready to send:
+  // - Fresh emails: scheduledAt has passed and no retry pending
+  // - Retry emails: both scheduledAt AND nextSendRetryAt have passed
+  //
+  // We always require scheduledAt <= NOW() to respect the original scheduling intent.
+  // nextSendRetryAt should not bypass scheduledAt (e.g., if data is corrupted or manually edited).
+  //
+  // Clear nextSendRetryAt when queuing so the email is in a clean "queued" state.
   const res = await globalPrismaClient.$queryRaw<{ id: string }[]>`
     UPDATE "EmailOutbox"
-    SET "isQueued" = TRUE
+    SET "isQueued" = TRUE, "nextSendRetryAt" = NULL
     WHERE "isQueued" = FALSE
       AND "isPaused" = FALSE
+      AND "skippedReason" IS NULL
       AND "finishedRenderingAt" IS NOT NULL
       AND "renderedHtml" IS NOT NULL
       AND "scheduledAt" <= NOW()
-      AND "nextSendRetryAt" IS NULL
+      AND (
+        -- Fresh emails: no retry pending
+        "nextSendRetryAt" IS NULL
+        OR
+        -- Retry emails: retry time has passed
+        "nextSendRetryAt" <= NOW()
+      )
     RETURNING "id";
   `;
   return {
@@ -519,18 +534,14 @@ async function queueReadyEmails(): Promise<{ queuedCount: number }> {
 }
 
 async function prepareSendPlan(deltaSeconds: number): Promise<TenancySendBatch[]> {
-  // Find tenancies with emails ready to send (either new emails or emails ready for retry)
+  // Find tenancies with queued emails ready to send
   const tenancyIds = await globalPrismaClient.emailOutbox.findMany({
     where: {
       isPaused: false,
-      skippedReason: null, // Don't process skipped/cancelled emails
-      finishedSendingAt: null, // Don't process already-finished emails (defense in depth)
-      OR: [
-        // Normal case: queued, not started, and no pending retry
-        { startedSendingAt: null, isQueued: true, nextSendRetryAt: null },
-        // Retry case: past retry time
-        { nextSendRetryAt: { lte: new Date() } },
-      ],
+      skippedReason: null,
+      finishedSendingAt: null,
+      startedSendingAt: null,
+      isQueued: true,
     },
     distinct: ["tenancyId"],
     select: { tenancyId: true },
@@ -556,30 +567,25 @@ function stochasticQuota(value: number): number {
 }
 
 async function claimEmailsForSending(tx: PrismaClientTransaction, tenancyId: string, limit: number): Promise<EmailOutbox[]> {
+  // Claim queued emails for sending
+  // Note: queueReadyEmails() handles the time-based logic, so we just look for isQueued = TRUE
   return await tx.$queryRaw<EmailOutbox[]>(Prisma.sql`
     WITH selected AS (
       SELECT "tenancyId", "id"
       FROM "EmailOutbox"
       WHERE "tenancyId" = ${tenancyId}::uuid
         AND "isPaused" = FALSE
-        AND "skippedReason" IS NULL  -- Don't process skipped/cancelled emails
-        AND "finishedSendingAt" IS NULL  -- Don't process already-finished emails (defense in depth)
+        AND "skippedReason" IS NULL
+        AND "finishedSendingAt" IS NULL
         AND "finishedRenderingAt" IS NOT NULL
-        AND (
-          -- Normal case: queued, not started, and no pending retry
-          ("startedSendingAt" IS NULL AND "isQueued" = TRUE AND "nextSendRetryAt" IS NULL)
-          OR
-          -- Retry case: past retry time
-          ("nextSendRetryAt" IS NOT NULL AND "nextSendRetryAt" <= NOW())
-        )
+        AND "startedSendingAt" IS NULL
+        AND "isQueued" = TRUE
       ORDER BY "priority" DESC, "scheduledAt" ASC, "createdAt" ASC
       LIMIT ${limit}
       FOR UPDATE SKIP LOCKED
     )
     UPDATE "EmailOutbox" AS e
-    SET 
-      "startedSendingAt" = NOW(),
-      "nextSendRetryAt" = NULL
+    SET "startedSendingAt" = NOW()
     FROM selected
     WHERE e."tenancyId" = selected."tenancyId" AND e."id" = selected."id"
     RETURNING e.*;
@@ -724,8 +730,8 @@ async function processSingleEmail(context: TenancyProcessingContext, row: EmailO
             finishedSendingAt: null,
           },
           data: {
-            startedSendingAt: null, // Unclaim the email
-            isQueued: false, // Prevent normal queue path from picking it up
+            startedSendingAt: null,
+            isQueued: false,
             sendRetries: newAttemptCount,
             nextSendRetryAt: new Date(Date.now() + backoffMs),
             sendAttemptErrors: updatedErrors as Prisma.InputJsonArray,
diff --git a/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts b/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
index 24d8d4aabf..50b23f17de 100644
--- a/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
+++ b/apps/e2e/tests/backend/endpoints/api/v1/emails/email-queue.test.ts
@@ -1,4 +1,4 @@
-import { StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
+import { StackAssertionError, throwErr } from "@stackframe/stack-shared/dist/utils/errors";
 import { wait } from "@stackframe/stack-shared/dist/utils/promises";
 import { deindent, nicify } from "@stackframe/stack-shared/dist/utils/strings";
 import beautify from "js-beautify";
@@ -1871,7 +1871,7 @@ let tempFailPort: number | null = null;
 
 async function startTempFailSmtpServer(): Promise<number> {
   if (tempFailServer) {
-    return tempFailPort!;
+    return tempFailPort ?? throwErr("tempFailServer not initialized");
   }
 
   return await new Promise((resolve, reject) => {
@@ -1903,7 +1903,7 @@ async function startTempFailSmtpServer(): Promise<number> {
     });
 
     tempFailServer.listen(0, '127.0.0.1', () => {
-      const address = tempFailServer!.address();
+      const address = (tempFailServer ?? throwErr("tempFailServer unexpectedly null in listen callback")).address();
       if (typeof address === 'object' && address !== null) {
         tempFailPort = address.port;
         resolve(tempFailPort);
@@ -1968,8 +1968,8 @@ async function getOutboxEmailById(emailId: string): Promise<OutboxEmailWithRetry
 
 // Helper to poll until an email with the given subject appears in the outbox
 async function waitForOutboxEmail(subject: string, timeoutMs = 30000): Promise<OutboxEmailWithRetryFields> {
-  const startTime = Date.now();
-  while (Date.now() - startTime < timeoutMs) {
+  const startTime = performance.now();
+  while (performance.now() - startTime < timeoutMs) {
     const emails = await getOutboxEmails({ subject });
     if (emails.length > 0) {
       return await getOutboxEmailById(emails[0].id);
@@ -1985,8 +1985,8 @@ async function waitForOutboxEmail(subject: string, timeoutMs = 30000): Promise<O
 // Helper to poll until the email has reached a specific send_retries
 // Note: Status may be "queued" or "sending" due to race conditions - that's expected
 async function waitForAttemptCount(emailId: string, attemptCount: number, timeoutMs = 60000): Promise<OutboxEmailWithRetryFields> {
-  const startTime = Date.now();
-  while (Date.now() - startTime < timeoutMs) {
+  const startTime = performance.now();
+  while (performance.now() - startTime < timeoutMs) {
     const email = await getOutboxEmailById(emailId);
     if (email.send_retries >= attemptCount) {
       return email;
@@ -2043,9 +2043,9 @@ describe("email queue deferred retry logic", () => {
 
     // Wait for the email to reach server-error status
     const maxWaitMs = 30000;
-    const startTime = Date.now();
+    const startTime = performance.now();
     let email = initialEmail;
-    while (Date.now() - startTime < maxWaitMs && email.status !== "server-error") {
+    while (performance.now() - startTime < maxWaitMs && email.status !== "server-error") {
       await wait(500);
       email = await getOutboxEmailById(emailId);
     }
@@ -2124,8 +2124,11 @@ describe("email queue deferred retry logic", () => {
       expect(emailAfterFirstAttempt.send_attempt_errors?.[0].external_message).toContain("450");
       expect(emailAfterFirstAttempt.status).not.toBe("server-error");
 
-      // Status should be "scheduled" (isQueued=false, waiting for nextSendRetryAt) or "sending" (next iteration already picked it up)
-      expect(["scheduled", "sending"]).toContain(emailAfterFirstAttempt.status);
+      // Status could be:
+      // - "scheduled": isQueued=false, waiting for nextSendRetryAt to pass
+      // - "queued": queueReadyEmails() ran, isQueued=true, waiting to be claimed
+      // - "sending": next iteration already picked it up
+      expect(["scheduled", "queued", "sending"]).toContain(emailAfterFirstAttempt.status);
 
       logIfTestFails("Email after first retry attempt", emailAfterFirstAttempt);
     });
@@ -2170,9 +2173,9 @@ describe("email queue deferred retry logic", () => {
       // Wait for all retries to exhaust (MAX_SEND_ATTEMPTS = 5)
       // With 450 errors (immediate) + exponential backoff (2s base * 2^attempt), worst case ~90s
       const maxWaitMs = 120000;
-      const startTime = Date.now();
+      const startTime = performance.now();
       let email = await getOutboxEmailById(emailId);
-      while (Date.now() - startTime < maxWaitMs && email.status !== "server-error") {
+      while (performance.now() - startTime < maxWaitMs && email.status !== "server-error") {
         await wait(1000);
         email = await getOutboxEmailById(emailId);
       }

From 725361e941e45a3c426be3bf4e1a6f333928fbe1 Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Thu, 12 Feb 2026 17:16:27 -0800
Subject: [PATCH 10/12] feat: add new index, constraints on isQueued

Our queuing logic will be slow if we dont use an index.
isQueued is most of the time true, since emailoutbox table accounts for historical data and all sent emails have isQueued set to true.
So the index will actually be quite selective.
We can't just use the partial index on isQueued because its build on tenancyId, and the queuing query doesnt check tenancy id.
---
 .../migration.sql                                           | 6 ++++++
 apps/backend/prisma/schema.prisma                           | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 apps/backend/prisma/migrations/20260213004424_email_outbox_is_queued_index/migration.sql

diff --git a/apps/backend/prisma/migrations/20260213004424_email_outbox_is_queued_index/migration.sql b/apps/backend/prisma/migrations/20260213004424_email_outbox_is_queued_index/migration.sql
new file mode 100644
index 0000000000..49f0db985e
--- /dev/null
+++ b/apps/backend/prisma/migrations/20260213004424_email_outbox_is_queued_index/migration.sql
@@ -0,0 +1,6 @@
+-- SPLIT_STATEMENT_SENTINEL
+-- SINGLE_STATEMENT_SENTINEL
+-- RUN_OUTSIDE_TRANSACTION_SENTINEL
+-- Index on isQueued for efficient queueReadyEmails() queries.
+-- Most emails have isQueued=TRUE (already processed), so filtering for FALSE is highly selective.
+CREATE INDEX CONCURRENTLY IF NOT EXISTS "EmailOutbox_isQueued_idx" ON /* SCHEMA_NAME_SENTINEL */."EmailOutbox" ("isQueued");
diff --git a/apps/backend/prisma/schema.prisma b/apps/backend/prisma/schema.prisma
index 6d608df609..df43c5c39c 100644
--- a/apps/backend/prisma/schema.prisma
+++ b/apps/backend/prisma/schema.prisma
@@ -833,7 +833,7 @@ model EmailOutbox {
   // The scheduled time of when the email should be added to the queue. Can be edited, but only if the email has not yet started sending. Doing so should set isQueued to false.
   scheduledAt DateTime
 
-  // The scheduled time of the email if it is in the future.
+  // Whether the email has been queued for sending. Once queued, this stays true unless the email is retried or rescheduled.
   isQueued Boolean @default(false)
 
   // A generated column that is equal to scheduledAt if isQueued is false, otherwise null. See the note above on EmailOutboxStatus.status for more details on dbgenerated values.
@@ -886,6 +886,7 @@ model EmailOutbox {
   @@index([tenancyId, finishedSendingAt(sort: Desc), scheduledAtIfNotYetQueued(sort: Desc), priority, id], map: "EmailOutbox_ordering_idx")
   @@index([tenancyId, simpleStatus], map: "EmailOutbox_simple_status_tenancy_idx")
   @@index([tenancyId, status], map: "EmailOutbox_status_tenancy_idx")
+  @@index([isQueued], map: "EmailOutbox_isQueued_idx")
 }
 
 model EmailOutboxProcessingMetadata {

From de35c1c4422364eda6358788995a8ab9a46ec05e Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Thu, 12 Feb 2026 17:23:25 -0800
Subject: [PATCH 11/12] refactor: split queuing query into two

Rather than using an OR which may/may not use the index correctly,
we use two queries.
---
 apps/backend/src/lib/email-queue-step.tsx | 36 +++++++++++++----------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/apps/backend/src/lib/email-queue-step.tsx b/apps/backend/src/lib/email-queue-step.tsx
index 3200770308..e120b34534 100644
--- a/apps/backend/src/lib/email-queue-step.tsx
+++ b/apps/backend/src/lib/email-queue-step.tsx
@@ -502,15 +502,26 @@ async function renderTenancyEmails(workerId: string, tenancyId: string, group: E
 }
 
 async function queueReadyEmails(): Promise<{ queuedCount: number }> {
-  // Queue emails that are ready to send:
-  // - Fresh emails: scheduledAt has passed and no retry pending
-  // - Retry emails: both scheduledAt AND nextSendRetryAt have passed
-  //
+  // Queue emails that are ready to send. Split into two queries for clarity and index usage.
   // We always require scheduledAt <= NOW() to respect the original scheduling intent.
-  // nextSendRetryAt should not bypass scheduledAt (e.g., if data is corrupted or manually edited).
-  //
+
+  // Query 1: Fresh emails (scheduledAt has passed, no retry pending)
+  const freshEmails = await globalPrismaClient.$queryRaw<{ id: string }[]>`
+    UPDATE "EmailOutbox"
+    SET "isQueued" = TRUE
+    WHERE "isQueued" = FALSE
+      AND "isPaused" = FALSE
+      AND "skippedReason" IS NULL
+      AND "finishedRenderingAt" IS NOT NULL
+      AND "renderedHtml" IS NOT NULL
+      AND "scheduledAt" <= NOW()
+      AND "nextSendRetryAt" IS NULL
+    RETURNING "id";
+  `;
+
+  // Query 2: Retry emails (both scheduledAt AND nextSendRetryAt have passed)
   // Clear nextSendRetryAt when queuing so the email is in a clean "queued" state.
-  const res = await globalPrismaClient.$queryRaw<{ id: string }[]>`
+  const retryEmails = await globalPrismaClient.$queryRaw<{ id: string }[]>`
     UPDATE "EmailOutbox"
     SET "isQueued" = TRUE, "nextSendRetryAt" = NULL
     WHERE "isQueued" = FALSE
@@ -519,17 +530,12 @@ async function queueReadyEmails(): Promise<{ queuedCount: number }> {
       AND "finishedRenderingAt" IS NOT NULL
       AND "renderedHtml" IS NOT NULL
       AND "scheduledAt" <= NOW()
-      AND (
-        -- Fresh emails: no retry pending
-        "nextSendRetryAt" IS NULL
-        OR
-        -- Retry emails: retry time has passed
-        "nextSendRetryAt" <= NOW()
-      )
+      AND "nextSendRetryAt" <= NOW()
     RETURNING "id";
   `;
+
   return {
-    queuedCount: res.length,
+    queuedCount: freshEmails.length + retryEmails.length,
   };
 }
 

From 091616af93c5df91088b8b293f2d04a20a999797 Mon Sep 17 00:00:00 2001
From: nams1570 <amanganapathy@gmail.com>
Date: Fri, 13 Feb 2026 11:37:50 -0800
Subject: [PATCH 12/12] fix: decouple validation and constraint adding for db

Without the not valid, the migration tries to create the constraint and validate it at the same time which can lock the entire table.
We decouple the validation and the constraint creation to avoid transaction timeouts
---
 AGENTS.md                                                 | 1 +
 .../20260210000000_deferred_email_retry/migration.sql     | 8 +++++---
 .../migration.sql                                         | 7 +++++++
 3 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 apps/backend/prisma/migrations/20260210000001_deferred_email_retry_validate/migration.sql

diff --git a/AGENTS.md b/AGENTS.md
index c25b62c3ef..de951248e0 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -101,6 +101,7 @@ To see all development ports, refer to the index.html of `apps/dev-launchpad/pub
 - Fail early, fail loud. Fail fast with an error instead of silently continuing.
 - Do NOT use `as`/`any`/type casts or anything else like that to bypass the type system unless you specifically asked the user about it. Most of the time a place where you would use type casts is not one where you actually need them. Avoid wherever possible.
 - When writing database migration files, assume that we have >1,000,000 rows in every table (unless otherwise specified). This means you may have to use CONDITIONALLY_REPEAT_MIGRATION_SENTINEL to avoid running the migration and things like concurrent index builds; see the existing migrations for examples.
+- Each migration file runs in its own transaction with a relatively short timeout. Split long-running operations into separate migration files to avoid timeouts. For example, when adding CHECK constraints, use `NOT VALID` in one migration, then `VALIDATE CONSTRAINT` in a separate migration file.
 - **When building frontend code, always carefully deal with loading and error states.** Be very explicit with these; some components make this easy, eg. the button onClick already takes an async callback for loading state, but make sure this is done everywhere, and make sure errors are NEVER just silently swallowed.
 - Unless very clearly equivalent from types, prefer explicit null/undefinedness checks over boolean checks, eg. `foo == null` instead of `!foo`.
 
diff --git a/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql b/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql
index 46243b8b79..1b54a669ae 100644
--- a/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql
+++ b/apps/backend/prisma/migrations/20260210000000_deferred_email_retry/migration.sql
@@ -9,17 +9,19 @@ ALTER TABLE "EmailOutbox"
 
 -- Constraint: nextSendRetryAt can only be set after at least one failed attempt
 -- (if sendRetries is 0, no attempt has failed, so there's nothing to retry)
+-- Use NOT VALID to avoid holding ACCESS EXCLUSIVE lock during full-table validation.
+-- Validation happens in a separate migration to avoid transaction timeout.
 ALTER TABLE "EmailOutbox"
   ADD CONSTRAINT "EmailOutbox_nextSendRetryAt_requires_failure"
-  CHECK ("nextSendRetryAt" IS NULL OR "sendRetries" > 0);
+  CHECK ("nextSendRetryAt" IS NULL OR "sendRetries" > 0) NOT VALID;
 
 -- Constraint: sendAttemptErrors can only be set after at least one failed attempt
 ALTER TABLE "EmailOutbox"
   ADD CONSTRAINT "EmailOutbox_sendAttemptErrors_requires_failure"
-  CHECK ("sendAttemptErrors" IS NULL OR "sendRetries" > 0);
+  CHECK ("sendAttemptErrors" IS NULL OR "sendRetries" > 0) NOT VALID;
 
 -- Constraint: nextSendRetryAt must be null when email has finished sending
 -- (if finishedSendingAt is set, there's nothing more to retry)
 ALTER TABLE "EmailOutbox"
   ADD CONSTRAINT "EmailOutbox_no_retry_after_finished"
-  CHECK ("finishedSendingAt" IS NULL OR "nextSendRetryAt" IS NULL);
+  CHECK ("finishedSendingAt" IS NULL OR "nextSendRetryAt" IS NULL) NOT VALID;
diff --git a/apps/backend/prisma/migrations/20260210000001_deferred_email_retry_validate/migration.sql b/apps/backend/prisma/migrations/20260210000001_deferred_email_retry_validate/migration.sql
new file mode 100644
index 0000000000..db7491d19b
--- /dev/null
+++ b/apps/backend/prisma/migrations/20260210000001_deferred_email_retry_validate/migration.sql
@@ -0,0 +1,7 @@
+-- Validate the deferred retry constraints added in the previous migration.
+-- This runs in a separate transaction to avoid timeout, and only takes
+-- SHARE UPDATE EXCLUSIVE lock (allows concurrent reads/writes).
+
+ALTER TABLE "EmailOutbox" VALIDATE CONSTRAINT "EmailOutbox_nextSendRetryAt_requires_failure";
+ALTER TABLE "EmailOutbox" VALIDATE CONSTRAINT "EmailOutbox_sendAttemptErrors_requires_failure";
+ALTER TABLE "EmailOutbox" VALIDATE CONSTRAINT "EmailOutbox_no_retry_after_finished";