11import type { OutgoingRequest } from "@/generated/prisma/client" ;
22import { getExternalDbSyncFusebox } from "@/lib/external-db-sync-metadata" ;
3+ import { recoverStaleOutgoingRequests , type RecoverStaleResult } from "@/lib/external-db-sync-queue" ;
34import { upstash } from "@/lib/upstash" ;
45import { globalPrismaClient , retryTransaction } from "@/prisma-client" ;
56import { createSmartRouteHandler } from "@/route-handlers/smart-route-handler" ;
@@ -85,13 +86,11 @@ export const GET = createSmartRouteHandler({
8586 const startTime = performance . now ( ) ;
8687 const maxDurationMs = parseMaxDurationMs ( query . maxDurationMs ) ;
8788 const pollIntervalMs = 50 ;
88- const staleClaimIntervalMinutes = 5 ;
8989 const pollerClaimLimit = getPollerClaimLimit ( ) ;
9090
9191 span . setAttribute ( "stack.external-db-sync.max-duration-ms" , maxDurationMs ) ;
9292 span . setAttribute ( "stack.external-db-sync.poll-interval-ms" , pollIntervalMs ) ;
9393 span . setAttribute ( "stack.external-db-sync.poller-claim-limit" , pollerClaimLimit ) ;
94- span . setAttribute ( "stack.external-db-sync.stale-claim-minutes" , staleClaimIntervalMinutes ) ;
9594
9695 let totalRequestsProcessed = 0 ;
9796 let iterationCount = 0 ;
@@ -115,6 +114,55 @@ export const GET = createSmartRouteHandler({
115114 } ) ;
116115 }
117116
117+ async function handleStaleRequests ( ) : Promise < RecoverStaleResult > {
118+ return await traceSpan ( "external-db-sync.poller.handleStaleRequests" , async ( staleSpan ) => {
119+ // Recovery is best-effort: any failure here must not abort the rest of the poller iteration,
120+ // because the loop still owns processing the pending queue.
121+ let result : RecoverStaleResult ;
122+ try {
123+ result = await recoverStaleOutgoingRequests ( STALE_REQUEST_THRESHOLD_MS ) ;
124+ } catch ( error ) {
125+ staleSpan . setAttribute ( "stack.external-db-sync.stale-recovery-error" , true ) ;
126+ captureError ( "poller-stale-recovery-error" , error ) ;
127+ return { resetIds : [ ] , deletedIds : [ ] } ;
128+ }
129+ const { resetIds, deletedIds } = result ;
130+ const total = resetIds . length + deletedIds . length ;
131+
132+ staleSpan . setAttribute ( "stack.external-db-sync.stale-reset-count" , resetIds . length ) ;
133+ staleSpan . setAttribute ( "stack.external-db-sync.stale-deleted-count" , deletedIds . length ) ;
134+
135+ if ( total > 0 ) {
136+ const ID_SAMPLE_LIMIT = 10 ;
137+ captureError (
138+ "poller-stale-outgoing-requests" ,
139+ new StackAssertionError (
140+ [
141+ `Recovered ${ total } stale outgoing request(s) (reset=${ resetIds . length } , deleted=${ deletedIds . length } ) older than ${ STALE_REQUEST_THRESHOLD_MS } ms.` ,
142+ `Stale rows are claims that never got cleared after publishing — the most likely cause is a poller lambda dying between the UPDATE that set startedFulfillingAt and the DELETE that should have removed the row.` ,
143+ `Recovery deletes the stale row if any active sibling (pending OR fresh-in-flight) already represents the work; among multiple stale rows for the same deduplicationKey it resets the oldest and deletes the rest; otherwise it resets startedFulfillingAt to NULL so the row can be re-claimed.` ,
144+ `If this fires repeatedly, look for nearby unhandled-promise-rejection events (which trigger process.exit(1) via the polyfill) or function-timeout signals on the external-db-sync poller route.` ,
145+ ] . join ( " " ) ,
146+ {
147+ totalRecovered : total ,
148+ staleResetCount : resetIds . length ,
149+ staleDeletedCount : deletedIds . length ,
150+ staleResetIdsSample : resetIds . slice ( 0 , ID_SAMPLE_LIMIT ) ,
151+ staleDeletedIdsSample : deletedIds . slice ( 0 , ID_SAMPLE_LIMIT ) ,
152+ staleResetIdsSampleNote : resetIds . length > ID_SAMPLE_LIMIT
153+ ? `Showing first ${ ID_SAMPLE_LIMIT } of ${ resetIds . length } reset ids; remainder omitted to bound payload size.`
154+ : `All ${ resetIds . length } reset ids included.` ,
155+ staleDeletedIdsSampleNote : deletedIds . length > ID_SAMPLE_LIMIT
156+ ? `Showing first ${ ID_SAMPLE_LIMIT } of ${ deletedIds . length } deleted ids; remainder omitted to bound payload size.`
157+ : `All ${ deletedIds . length } deleted ids included.` ,
158+ } ,
159+ ) ,
160+ ) ;
161+ }
162+ return { resetIds, deletedIds } ;
163+ } ) ;
164+ }
165+
118166 async function deleteOutgoingRequest ( id : string ) : Promise < void > {
119167 await retryTransaction ( globalPrismaClient , async ( tx ) => {
120168 await tx . outgoingRequest . delete ( { where : { id } } ) ;
@@ -205,6 +253,9 @@ export const GET = createSmartRouteHandler({
205253 }
206254
207255 if ( requests . length === 0 ) {
256+ // Performance optimization: skip the upstash batch call when the
257+ // caller passed no claimed rows (i.e. claimPendingRequests returned
258+ // an empty array).
208259 processSpan . setAttribute ( "stack.external-db-sync.processed-count" , 0 ) ;
209260 return 0 ;
210261 }
@@ -243,23 +294,9 @@ export const GET = createSmartRouteHandler({
243294 return { stopReason : "disabled" , processed : 0 } ;
244295 }
245296
246- const staleRequests = await globalPrismaClient . $queryRaw < { id : string , startedFulfillingAt : Date } [ ] > `
247- SELECT "id", "startedFulfillingAt"
248- FROM "OutgoingRequest"
249- WHERE "startedFulfillingAt" IS NOT NULL
250- AND "startedFulfillingAt" < NOW() - ${ STALE_REQUEST_THRESHOLD_MS } * INTERVAL '1 millisecond'
251- LIMIT 10
252- ` ;
253- iterationSpan . setAttribute ( "stack.external-db-sync.stale-count" , staleRequests . length ) ;
254- if ( staleRequests . length > 0 ) {
255- captureError (
256- "poller-stale-outgoing-requests" ,
257- new StackAssertionError (
258- `Found ${ staleRequests . length } outgoing request(s) with startedFulfillingAt older than ${ STALE_REQUEST_THRESHOLD_MS } ms` ,
259- { staleRequestIds : staleRequests . map ( r => r . id ) } ,
260- ) ,
261- ) ;
262- }
297+ const stale = await handleStaleRequests ( ) ;
298+ iterationSpan . setAttribute ( "stack.external-db-sync.stale-reset-count" , stale . resetIds . length ) ;
299+ iterationSpan . setAttribute ( "stack.external-db-sync.stale-deleted-count" , stale . deletedIds . length ) ;
263300
264301 const pendingRequests = await claimPendingRequests ( ) ;
265302 iterationSpan . setAttribute ( "stack.external-db-sync.pending-count" , pendingRequests . length ) ;
0 commit comments