livekit · u9g · Apr 28, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/.changeset/dynamic-endpointing-node-port.md b/.changeset/dynamic-endpointing-node-port.md
@@ -0,0 +1,5 @@
+---
+"@livekit/agents": patch
+---
+
+feat: Add dynamic endpointing for voice turn handling
diff --git a/agents/src/utils.ts b/agents/src/utils.ts
@@ -352,43 +352,97 @@ export class AsyncIterableQueue<T> implements AsyncIterableIterator<T> {
 
 /** @internal */
 export class ExpFilter {
-  #alpha: number;
-  #max?: number;
-  #filtered?: number = undefined;
+  private _alpha: number;
+  private _filtered: number | undefined;
+  private _maxVal: number | undefined;
+  private _minVal: number | undefined;
+
+  // Ref: python livekit-agents/livekit/agents/utils/exp_filter.py - 5-20 lines
+  constructor(alpha: number, maxVal?: number, minVal?: number, initial?: number) {
+    if (!(alpha > 0 && alpha <= 1)) {
+      throw new Error('alpha must be in (0, 1].');
+    }
 
-  constructor(alpha: number, max?: number) {
-    this.#alpha = alpha;
-    this.#max = max;
+    this._alpha = alpha;
+    this._filtered = initial;
+    this._maxVal = maxVal;
+    this._minVal = minVal;
+  }
+
+  // Ref: python livekit-agents/livekit/agents/utils/exp_filter.py - 21-37 lines
+  reset({
+    alpha,
+    initial,
+    minVal,
+    maxVal,
+  }: {
+    alpha?: number;
+    initial?: number;
+    minVal?: number;
+    maxVal?: number;
+  } = {}): void {
+    if (alpha !== undefined) {
+      if (!(alpha > 0 && alpha <= 1)) {
+        throw new Error('alpha must be in (0, 1].');
+      }
+      this._alpha = alpha;
+    }
+    if (initial !== undefined) {
+      this._filtered = initial;
+    }
+    if (minVal !== undefined) {
+      this._minVal = minVal;
+    }
+    if (maxVal !== undefined) {
+      this._maxVal = maxVal;
+    }
   }
 
-  reset(alpha?: number) {
-    if (alpha) {
-      this.#alpha = alpha;
+  // Ref: python livekit-agents/livekit/agents/utils/exp_filter.py - 38-57 lines
+  apply(exp: number, sample?: number): number {
+    sample ??= this._filtered;
+
+    if (sample !== undefined && this._filtered === undefined) {
+      this._filtered = sample;
+    } else if (sample !== undefined && this._filtered !== undefined) {
+      const a = this._alpha ** exp;
+      this._filtered = a * this._filtered + (1 - a) * sample;
     }
-    this.#filtered = undefined;
-  }
 
-  apply(exp: number, sample: number): number {
-    if (this.#filtered) {
-      const a = this.#alpha ** exp;
-      this.#filtered = a * this.#filtered + (1 - a) * sample;
-    } else {
-      this.#filtered = sample;
+    if (this._filtered === undefined) {
+      throw new Error('sample or initial value must be given.');
+    }
+
+    if (this._maxVal !== undefined && this._filtered > this._maxVal) {
+      this._filtered = this._maxVal;
     }
 
-    if (this.#max && this.#filtered > this.#max) {
-      this.#filtered = this.#max;
+    if (this._minVal !== undefined && this._filtered < this._minVal) {
+      this._filtered = this._minVal;
     }
 
-    return this.#filtered;
+    return this._filtered;
+  }
+
+  // Ref: python livekit-agents/livekit/agents/utils/exp_filter.py - 59-61 lines
+  get value(): number | undefined {
+    return this._filtered;
   }
 
   get filtered(): number | undefined {
-    return this.#filtered;
+    return this.value;
   }
 
   set alpha(alpha: number) {
-    this.#alpha = alpha;
+    if (!(alpha > 0 && alpha <= 1)) {
+      throw new Error('alpha must be in (0, 1].');
+    }
+    this._alpha = alpha;
+  }
+
+  // Ref: python livekit-agents/livekit/agents/utils/exp_filter.py - 63-64 lines
+  updateBase(alpha: number): void {
+    this._alpha = alpha;
   }
 }
 

diff --git a/agents/src/voice/agent_activity.ts b/agents/src/voice/agent_activity.ts
@@ -65,6 +65,7 @@ import {
   type RecognitionHooks,
   type STTPipeline,
 } from './audio_recognition.js';
+import { createEndpointing } from './endpointing.js';
 import type { AgentState } from './events.js';
 import {
   AgentSessionEventTypes,
@@ -90,6 +91,8 @@ import {
 } from './generation.js';
 import type { TimedString } from './io.js';
 import { SpeechHandle } from './speech_handle.js';
+import type { EndpointingOptions } from './turn_config/endpointing.js';
+import { stripUndefined } from './turn_config/utils.js';
 import { setParticipantSpanAttributes } from './utils.js';
 
 export const agentActivityStorage = new AsyncLocalStorage<AgentActivity>();
@@ -195,6 +198,7 @@ export class AgentActivity implements RecognitionHooks {
   private isInterruptionDetectionEnabled: boolean;
   private isInterruptionByAudioActivityEnabled: boolean;
   private isDefaultInterruptionByAudioActivityEnabled: boolean;
+  private interruptionDetected = false;
 
   // for false interruption handling
   private pausedSpeech?: PausedSpeechInfo;
@@ -218,6 +222,8 @@ export class AgentActivity implements RecognitionHooks {
     this.onError(ev);
 
   private readonly onInterruptionOverlappingSpeech = (ev: OverlappingSpeechEvent): void => {
+    // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1503-1508 lines
+    this.interruptionDetected = ev.isInterruption;
     this.agentSession.emit(AgentSessionEventTypes.OverlappingSpeech, ev);
   };
 
@@ -489,12 +495,8 @@ export class AgentActivity implements RecognitionHooks {
       turnDetector: typeof this.turnDetection === 'string' ? undefined : this.turnDetection,
       turnDetectionMode: this.turnDetectionMode,
       interruptionDetection: this.interruptionDetector,
-      minEndpointingDelay:
-        this.agent.turnHandling?.endpointing?.minDelay ??
-        this.agentSession.sessionOptions.turnHandling.endpointing.minDelay,
-      maxEndpointingDelay:
-        this.agent.turnHandling?.endpointing?.maxDelay ??
-        this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay,
+      // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 779-789 lines
+      endpointing: createEndpointing(this.endpointingOptions),
       rootSpanContext: this.agentSession.rootSpanContext,
       sttModel: this.stt?.label,
       sttProvider: this.getSttProvider(),
@@ -673,19 +675,13 @@ export class AgentActivity implements RecognitionHooks {
     return this.agent.turnHandling ?? this.agentSession.sessionOptions.turnHandling;
   }
 
-  // get minEndpointingDelay(): number {
-  //   return (
-  //     this.agent.turnHandling?.endpointing?.minDelay ??
-  //     this.agentSession.sessionOptions.turnHandling.endpointing.minDelay
-  //   );
-  // }
-
-  // get maxEndpointingDelay(): number {
-  //   return (
-  //     this.agent.turnHandling?.endpointing?.maxDelay ??
-  //     this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay
-  //   );
-  // }
+  private get endpointingOptions(): EndpointingOptions {
+    const agentEndpointing = this.agent.turnHandling?.endpointing;
+    return {
+      ...this.agentSession.sessionOptions.turnHandling.endpointing,
+      ...(agentEndpointing ? stripUndefined(agentEndpointing) : {}),
+    };
+  }
 
   get toolCtx(): ToolContext {
     return this.agent.toolCtx;
@@ -730,9 +726,11 @@ export class AgentActivity implements RecognitionHooks {
   updateOptions({
     toolChoice,
     turnDetection,
+    endpointing,
   }: {
     toolChoice?: ToolChoice | null;
     turnDetection?: TurnDetectionMode;
+    endpointing?: EndpointingOptions;
   }): void {
     if (toolChoice !== undefined) {
       this.toolChoice = toolChoice;
@@ -755,7 +753,11 @@ export class AgentActivity implements RecognitionHooks {
     }
 
     if (this.audioRecognition) {
-      this.audioRecognition.updateOptions({ turnDetection: this.turnDetectionMode });
+      // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 487-493 lines
+      this.audioRecognition.updateOptions({
+        endpointing: endpointing !== undefined ? createEndpointing(endpointing) : undefined,
+        turnDetection: this.turnDetectionMode,
+      });
     }
   }
 
@@ -934,12 +936,9 @@ export class AgentActivity implements RecognitionHooks {
 
     if (!this.vad) {
       this.agentSession._updateUserState('speaking');
-      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
-        this.audioRecognition.onStartOfOverlapSpeech(
-          0,
-          Date.now(),
-          this.agentSession._userSpeakingSpan,
-        );
+      if (this.audioRecognition) {
+        // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1510-1517 lines
+        this.audioRecognition.onStartOfSpeech(Date.now(), 0, this.agentSession._userSpeakingSpan);
       }
     }
 
@@ -959,8 +958,9 @@ export class AgentActivity implements RecognitionHooks {
     this.logger.info(ev, 'onInputSpeechStopped');
 
     if (!this.vad) {
-      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
-        this.audioRecognition.onEndOfOverlapSpeech(Date.now(), this.agentSession._userSpeakingSpan);
+      if (this.audioRecognition) {
+        // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1528-1535 lines
+        this.audioRecognition.onEndOfSpeech(Date.now(), this.agentSession._userSpeakingSpan);
       }
       this.agentSession._updateUserState('listening');
     }
@@ -1042,14 +1042,17 @@ export class AgentActivity implements RecognitionHooks {
       lastSpeakingTime: speechStartTime,
       otelContext: otelContext.active(),
     });
-    if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+    if (this.audioRecognition) {
       // Pass speechStartTime as the absolute startedAt timestamp.
-      this.audioRecognition.onStartOfOverlapSpeech(
-        ev.speechDuration,
+      // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1662-1673 lines
+      this.audioRecognition.onStartOfSpeech(
         speechStartTime,
+        ev.speechDuration,
         this.agentSession._userSpeakingSpan,
       );
     }
+    // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1674-1676 lines
+    this.interruptionDetected = false;
 
     if (this.falseInterruptionTimer) {
       // cancel the timer when user starts speaking but leave the paused state unchanged
@@ -1080,11 +1083,13 @@ export class AgentActivity implements RecognitionHooks {
       // Subtract both silenceDuration and inferenceDuration to correct for VAD model latency.
       speechEndTime = speechEndTime - ev.silenceDuration - ev.inferenceDuration;
     }
-    if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+    if (this.audioRecognition) {
       // Pass speechEndTime as the absolute endedAt timestamp.
-      this.audioRecognition.onEndOfOverlapSpeech(
+      // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1698-1712 lines
+      this.audioRecognition.onEndOfSpeech(
         speechEndTime,
         this.agentSession._userSpeakingSpan,
+        this.isInterruptionDetectionEnabled ? this.interruptionDetected : undefined,
       );
     }
     this.agentSession._updateUserState('listening', {
@@ -1167,15 +1172,12 @@ export class AgentActivity implements RecognitionHooks {
         const audioOutput = this.agentSession.output.audio;
 
         if (
-          this.isInterruptionDetectionEnabled &&
           this.audioRecognition &&
+          !this.audioRecognition.endpointingOverlapping &&
           this.agentSession.agentState === 'speaking'
         ) {
-          this.audioRecognition.onStartOfOverlapSpeech(
-            0,
-            Date.now(),
-            this.agentSession._userSpeakingSpan,
-          );
+          // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1631-1639 lines
+          this.audioRecognition.onStartOfSpeech(Date.now(), 0, this.agentSession._userSpeakingSpan);
         }
 
         this.updatePausedSpeech(this._currentSpeech, timeout);
@@ -1943,8 +1945,11 @@ export class AgentActivity implements RecognitionHooks {
         startTime: startedSpeakingAt,
         otelContext: speechHandle._agentTurnContext,
       });
-      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
-        this.audioRecognition.onStartOfAgentSpeech();
+      if (this.audioRecognition) {
+        // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 2223-2231 lines
+        this.audioRecognition.onStartOfAgentSpeech(replyStartedSpeakingAt);
+      }
+      if (this.isInterruptionDetectionEnabled) {
         this.isInterruptionByAudioActivityEnabled = false;
       }
     };
@@ -2036,10 +2041,12 @@ export class AgentActivity implements RecognitionHooks {
 
     if (this.agentSession.agentState === 'speaking') {
       this.agentSession._updateAgentState('listening');
-      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+      if (this.audioRecognition) {
         this.audioRecognition.onEndOfAgentSpeech(Date.now());
       }
-      this.restoreInterruptionByAudioActivity();
+      if (this.isInterruptionDetectionEnabled) {
+        this.restoreInterruptionByAudioActivity();
+      }
     }
   }
 
@@ -2227,8 +2234,11 @@ export class AgentActivity implements RecognitionHooks {
         startTime: startedSpeakingAt,
         otelContext: speechHandle._agentTurnContext,
       });
-      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
-        this.audioRecognition.onStartOfAgentSpeech();
+      if (this.audioRecognition) {
+        // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 2579-2588 lines
+        this.audioRecognition.onStartOfAgentSpeech(agentStartedSpeakingAt);
+      }
+      if (this.isInterruptionDetectionEnabled) {
         this.isInterruptionByAudioActivityEnabled = false;
       }
     };
@@ -2390,8 +2400,10 @@ export class AgentActivity implements RecognitionHooks {
 
       if (this.agentSession.agentState === 'speaking') {
         this.agentSession._updateAgentState('listening');
-        if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
+        if (this.audioRecognition) {
           this.audioRecognition.onEndOfAgentSpeech(Date.now());
+        }
+        if (this.isInterruptionDetectionEnabled) {
           this.restoreInterruptionByAudioActivity();
         }
       }
@@ -2433,11 +2445,11 @@ export class AgentActivity implements RecognitionHooks {
       this.agentSession._updateAgentState('thinking');
     } else if (this.agentSession.agentState === 'speaking') {
       this.agentSession._updateAgentState('listening');
-      if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
-        {
-          this.audioRecognition.onEndOfAgentSpeech(Date.now());
-          this.restoreInterruptionByAudioActivity();
-        }
+      if (this.audioRecognition) {
+        this.audioRecognition.onEndOfAgentSpeech(Date.now());
+      }
+      if (this.isInterruptionDetectionEnabled) {
+        this.restoreInterruptionByAudioActivity();
       }
     }
 
@@ -3390,7 +3402,8 @@ export class AgentActivity implements RecognitionHooks {
           otelContext: this.pausedSpeech.handle._agentTurnContext,
         });
         if (this.audioRecognition && this.pausedSpeech.agentState === 'speaking') {
-          this.audioRecognition.onStartOfAgentSpeech();
+          // Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 3479-3486 lines
+          this.audioRecognition.onStartOfAgentSpeech(Date.now());
         }
         if (this.isInterruptionDetectionEnabled) {
           this.isInterruptionByAudioActivityEnabled = false;