From 2efad257c040ceb138a47007e1d7d695cb87270f Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Wed, 13 May 2026 11:28:12 -0500
Subject: [PATCH 1/3] refactor: Clarify already-tracked warnings as skips with
 remedy

Reword the at-most-once warning messages emitted by LDAIConfigTracker
and AIGraphTracker. Each warning now states which method is being
skipped, what state was already recorded, and tells the caller to
invoke create_tracker on the AI Config (or agent graph) to start a new
run. This mirrors the doc clarifications recently landed in the Go SDK
(launchdarkly/go-server-sdk #363).
---
 packages/sdk/server-ai/src/ldai/tracker.py | 57 +++++++++++++++++-----
 1 file changed, 45 insertions(+), 12 deletions(-)

diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py
index 16d860f..67231de 100644
--- a/packages/sdk/server-ai/src/ldai/tracker.py
+++ b/packages/sdk/server-ai/src/ldai/tracker.py
@@ -224,7 +224,11 @@ def track_duration(self, duration: int) -> None:
         :param duration: Duration in milliseconds.
         """
         if self._summary.duration_ms is not None:
-            log.warning("Duration has already been tracked for this execution. %s", self.__get_track_data())
+            log.warning(
+                "Skipping track_duration: duration already recorded on this tracker. "
+                "Call create_tracker on the AI Config for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary._duration_ms = duration
         self._ld_client.track(
@@ -239,7 +243,8 @@ def track_time_to_first_token(self, time_to_first_token: int) -> None:
         """
         if self._summary.time_to_first_token is not None:
             log.warning(
-                "Time to first token has already been tracked for this execution. %s",
+                "Skipping track_time_to_first_token: time-to-first-token already recorded on this tracker. "
+                "Call create_tracker on the AI Config for a new run. %s",
                 self.__get_track_data(),
             )
             return
@@ -393,7 +398,11 @@ def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
         :param feedback: Dictionary containing feedback kind.
         """
         if self._summary.feedback is not None:
-            log.warning("Feedback has already been tracked for this execution. %s", self.__get_track_data())
+            log.warning(
+                "Skipping track_feedback: feedback already recorded on this tracker. "
+                "Call create_tracker on the AI Config for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary._feedback = feedback
         if feedback["kind"] == FeedbackKind.Positive:
@@ -430,7 +439,11 @@ def track_success(self) -> None:
         Track a successful AI generation.
         """
         if self._summary.success is not None:
-            log.warning("Success has already been tracked for this execution. %s", self.__get_track_data())
+            log.warning(
+                "Skipping track_success: success/error already recorded on this tracker. "
+                "Call create_tracker on the AI Config for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary._success = True
         self._ld_client.track(
@@ -442,7 +455,11 @@ def track_error(self) -> None:
         Track an unsuccessful AI generation attempt.
         """
         if self._summary.success is not None:
-            log.warning("Success has already been tracked for this execution. %s", self.__get_track_data())
+            log.warning(
+                "Skipping track_error: success/error already recorded on this tracker. "
+                "Call create_tracker on the AI Config for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary._success = False
         self._ld_client.track(
@@ -478,7 +495,11 @@ def track_tokens(self, tokens: TokenUsage) -> None:
         :param tokens: Token usage data from either custom, OpenAI, or Bedrock sources.
         """
         if self._summary.tokens is not None:
-            log.warning("Tokens have already been tracked for this execution. %s", self.__get_track_data())
+            log.warning(
+                "Skipping track_tokens: token usage already recorded on this tracker. "
+                "Call create_tracker on the AI Config for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary._tokens = tokens
         td = self.__get_track_data()
@@ -608,8 +629,10 @@ def track_invocation_success(self) -> None:
         """
         if self._summary.success is not None:
             log.warning(
-                "Invocation status has already been tracked for this graph execution. %s",
-                self.__get_track_data())
+                "Skipping track_invocation_success: invocation result already recorded on this graph tracker. "
+                "Call create_tracker on the agent graph for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary.success = True
         self._ld_client.track(
@@ -625,8 +648,10 @@ def track_invocation_failure(self) -> None:
         """
         if self._summary.success is not None:
             log.warning(
-                "Invocation status has already been tracked for this graph execution. %s",
-                self.__get_track_data())
+                "Skipping track_invocation_failure: invocation result already recorded on this graph tracker. "
+                "Call create_tracker on the agent graph for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary.success = False
         self._ld_client.track(
@@ -643,7 +668,11 @@ def track_duration(self, duration: int) -> None:
         :param duration: Duration in milliseconds.
         """
         if self._summary.duration_ms is not None:
-            log.warning("Duration has already been tracked for this graph execution. %s", self.__get_track_data())
+            log.warning(
+                "Skipping track_duration: duration already recorded on this graph tracker. "
+                "Call create_tracker on the agent graph for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary.duration_ms = duration
         self._ld_client.track(
@@ -662,7 +691,11 @@ def track_total_tokens(self, tokens: Optional[TokenUsage] = None) -> None:
         if tokens is None or tokens.total <= 0:
             return
         if self._summary.tokens is not None:
-            log.warning("Token usage has already been tracked for this graph execution. %s", self.__get_track_data())
+            log.warning(
+                "Skipping track_total_tokens: tokens already recorded on this graph tracker. "
+                "Call create_tracker on the agent graph for a new run. %s",
+                self.__get_track_data(),
+            )
             return
         self._summary.tokens = tokens
         self._ld_client.track(

From d68bdbffd9003a41435ae77c23f7e51c76382f24 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Wed, 13 May 2026 11:31:19 -0500
Subject: [PATCH 2/3] docs: Clarify runId purpose and per-method tracker
 semantics

Expand the LDAIConfigTracker class docstring to explain that all events
emitted by a tracker share a runId and that the resumption token
preserves it across processes. Document the AIConfig.create_tracker
factory: each call mints a new runId, and metrics from different runIds
cannot be combined.

Add per-method paragraphs to each track_* method (on both
LDAIConfigTracker and AIGraphTracker) describing whether they record
at-most-once or may be called multiple times. Note that track_success
and track_error share state, as do track_invocation_success and
track_invocation_failure on the graph tracker. Document that the
track_metrics_of wrappers re-run the inner block but emit no extra
metric events on subsequent calls.

Sweep "execution" / "invocation" docstring and comment uses that meant
"a single AI run" and replace with "AI run" or "graph run". Public
method and wire-format event names (track_invocation_success/failure,
$ld:ai:graph:invocation_*) are unchanged.

Fix the README examples to use ai_config.create_tracker() -- the
ai_config.tracker attribute no longer exists.

Follow-up to launchdarkly/go-server-sdk #363, applying the same doc
clarifications to the python SDK.
---
 packages/sdk/server-ai/README.md           | 11 ++-
 packages/sdk/server-ai/src/ldai/models.py  | 12 ++-
 packages/sdk/server-ai/src/ldai/tracker.py | 91 +++++++++++++++++-----
 3 files changed, 91 insertions(+), 23 deletions(-)

diff --git a/packages/sdk/server-ai/README.md b/packages/sdk/server-ai/README.md
index b305c4c..38eb89e 100644
--- a/packages/sdk/server-ai/README.md
+++ b/packages/sdk/server-ai/README.md
@@ -92,7 +92,7 @@ ai_config = ai_client.completion_config(
 if ai_config.enabled:
     messages = ai_config.messages
     model = ai_config.model
-    tracker = ai_config.tracker
+    tracker = ai_config.create_tracker()
     # Use with your AI provider
 ```
 
@@ -156,8 +156,9 @@ async def main():
     # Create LangChain model from configuration
     llm = await LangChainProvider.create_langchain_model(ai_config)
     
-    # Use with tracking (sync invoke)
-    response = ai_config.tracker.track_metrics_of(
+    # Use with tracking (sync invoke). Mint a tracker once per AI run.
+    tracker = ai_config.create_tracker()
+    response = tracker.track_metrics_of(
         lambda: llm.invoke(messages),
         lambda result: LangChainProvider.get_ai_metrics_from_response(result)
     )
@@ -196,7 +197,9 @@ async def main():
             temperature=ai_config.model.get_parameter('temperature') if ai_config.model else 0.5,
         )
     
-    result = await ai_config.tracker.track_metrics_of_async(
+    # Mint a tracker once per AI run.
+    tracker = ai_config.create_tracker()
+    result = await tracker.track_metrics_of_async(
         call_custom_provider,
         map_custom_provider_metrics
     )
diff --git a/packages/sdk/server-ai/src/ldai/models.py b/packages/sdk/server-ai/src/ldai/models.py
index fad4b8c..8e89008 100644
--- a/packages/sdk/server-ai/src/ldai/models.py
+++ b/packages/sdk/server-ai/src/ldai/models.py
@@ -203,10 +203,20 @@ class AIConfig:
 
     Instances are always created by the SDK client, which injects a real
     ``create_tracker`` factory.  User code should never need to construct
-    this directly — use the ``*Default`` variants for default values.
+    this directly -- use the ``*Default`` variants for default values.
+
+    ``create_tracker`` is a zero-argument callable: each invocation creates a
+    new tracker for a fresh AI run. Each call mints a new ``runId`` (a UUIDv4)
+    that LaunchDarkly uses to correlate the run's events in metrics views.
+    Call it once per AI run; metrics from different ``runId``s cannot be
+    combined.
     """
     key: str
     enabled: bool
+    #: Factory that creates a new tracker for a fresh AI run. Each call mints a
+    #: new ``runId`` (a UUIDv4) so LaunchDarkly can correlate the run's events
+    #: in metrics views. Call this once per AI run; metrics from different
+    #: ``runId``s cannot be combined.
     create_tracker: Callable[[], Any]
     model: Optional[ModelConfig] = None
     provider: Optional[ProviderConfig] = None
diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py
index 67231de..d6d6a1b 100644
--- a/packages/sdk/server-ai/src/ldai/tracker.py
+++ b/packages/sdk/server-ai/src/ldai/tracker.py
@@ -84,14 +84,20 @@ def resumption_token(self) -> Optional[str]:
         """
         URL-safe Base64-encoded resumption token captured at tracker
         instantiation. Useful for deferred feedback flows where a downstream
-        process needs to associate events with the original execution.
+        process needs to associate events with the original AI run.
         """
         return self._resumption_token
 
 
 class LDAIConfigTracker:
     """
-    Tracks configuration and usage metrics for LaunchDarkly AI operations.
+    Records metrics for a single AI run.
+
+    All events a tracker emits share a runId (a UUIDv4) so LaunchDarkly can correlate
+    them in metrics views. See individual track methods for their specific semantics.
+    Call ``create_tracker`` on the AI Config to start a new run. A resumption token
+    preserves the runId, so events emitted by a tracker reconstructed in another
+    process correlate with the original run.
     """
 
     def __init__(
@@ -110,7 +116,7 @@ def __init__(
         Initialize an AI Config tracker.
 
         :param ld_client: LaunchDarkly client instance.
-        :param run_id: Unique identifier for this execution.
+        :param run_id: Unique identifier for this AI run.
         :param config_key: Configuration key for tracking.
         :param variation_key: Variation key for tracking.
         :param version: Version of the variation.
@@ -162,7 +168,7 @@ def from_resumption_token(cls, token: str, ld_client: LDClient, context: Context
 
         This is used for cross-process scenarios such as deferred feedback,
         where a different service needs to associate tracking events with the
-        original execution's ``runId``.
+        original AI run's ``runId``.
 
         :param token: A URL-safe Base64-encoded resumption token obtained from
             :attr:`resumption_token`.
@@ -219,7 +225,9 @@ def __get_track_data(self) -> dict:
 
     def track_duration(self, duration: int) -> None:
         """
-        Manually track the duration of an AI operation.
+        Manually track the duration of an AI run.
+
+        Records at most once per Tracker; further calls are ignored.
 
         :param duration: Duration in milliseconds.
         """
@@ -237,7 +245,9 @@ def track_duration(self, duration: int) -> None:
 
     def track_time_to_first_token(self, time_to_first_token: int) -> None:
         """
-        Manually track the time to first token of an AI operation.
+        Manually track the time to first token of an AI run.
+
+        Records at most once per Tracker; further calls are ignored.
 
         :param time_to_first_token: Time to first token in milliseconds.
         """
@@ -258,10 +268,10 @@ def track_time_to_first_token(self, time_to_first_token: int) -> None:
 
     def track_duration_of(self, func):
         """
-        Automatically track the duration of an AI operation.
+        Automatically track the duration of an AI run.
 
-        An exception occurring during the execution of the function will still
-        track the duration. The exception will be re-thrown.
+        An exception raised while the function runs will still record the
+        duration. The exception will be re-thrown.
 
         :param func: Function to track (synchronous only).
         :return: Result of the tracked function.
@@ -322,6 +332,10 @@ def track_metrics_of(
         non-``None`` ``duration_ms`` field, that value is used as the measured duration
         instead of the wall-clock elapsed time.
 
+        Because each inner metric is at-most-once per Tracker, calling this twice
+        on the same Tracker will run the inner block again but produce no
+        additional metric events.
+
         :param metrics_extractor: Function that extracts LDAIMetrics from the operation result
         :param func: Synchronous callable that runs the operation
         :return: The result of the operation
@@ -353,6 +367,10 @@ async def track_metrics_of_async(
         non-``None`` ``duration_ms`` field, that value is used as the measured duration
         instead of the wall-clock elapsed time.
 
+        Because each inner metric is at-most-once per Tracker, calling this twice
+        on the same Tracker will run the inner block again but produce no
+        additional metric events.
+
         :param metrics_extractor: Function that extracts LDAIMetrics from the operation result
         :param func: Async callable or zero-arg callable that returns an awaitable when called
         :return: The result of the operation
@@ -375,6 +393,9 @@ def track_judge_result(self, judge_result: Any) -> None:
         """
         Track a judge result, including the evaluation score with judge config key.
 
+        May be called multiple times per Tracker; each call records the
+        provided judge result.
+
         :param judge_result: JudgeResult object containing score, metric key, and success status
         """
         if not judge_result.sampled:
@@ -393,7 +414,9 @@ def track_judge_result(self, judge_result: Any) -> None:
 
     def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
         """
-        Track user feedback for an AI operation.
+        Track user feedback for an AI run.
+
+        Records at most once per Tracker; further calls are ignored.
 
         :param feedback: Dictionary containing feedback kind.
         """
@@ -422,11 +445,14 @@ def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
 
     def track_tool_calls(self, tool_calls: Iterable[str]) -> None:
         """
-        Track the tool calls made during an AI operation.
+        Track the tool calls made during an AI run.
 
         Appends to the summary's tool call list and fires a
         ``$ld:ai:tool_call`` event for each tool.
 
+        May be called multiple times per Tracker; each call records an event
+        for every tool identifier provided.
+
         :param tool_calls: Tool identifiers (e.g. from a model response).
         """
         tool_calls_list = list(tool_calls)
@@ -437,6 +463,10 @@ def track_tool_calls(self, tool_calls: Iterable[str]) -> None:
     def track_success(self) -> None:
         """
         Track a successful AI generation.
+
+        Records at most once per Tracker. track_success and track_error share
+        state; only one of the two can record per Tracker, and subsequent calls
+        are ignored.
         """
         if self._summary.success is not None:
             log.warning(
@@ -453,6 +483,10 @@ def track_success(self) -> None:
     def track_error(self) -> None:
         """
         Track an unsuccessful AI generation attempt.
+
+        Records at most once per Tracker. track_success and track_error share
+        state; only one of the two can record per Tracker, and subsequent calls
+        are ignored.
         """
         if self._summary.success is not None:
             log.warning(
@@ -492,6 +526,8 @@ def track_tokens(self, tokens: TokenUsage) -> None:
         """
         Track token usage metrics.
 
+        Records at most once per Tracker; further calls are ignored.
+
         :param tokens: Token usage data from either custom, OpenAI, or Bedrock sources.
         """
         if self._summary.tokens is not None:
@@ -527,7 +563,10 @@ def track_tokens(self, tokens: TokenUsage) -> None:
 
     def track_tool_call(self, tool_key: str) -> None:
         """
-        Track a tool invocation for this configuration (standalone or within a graph).
+        Track a tool call for this configuration (standalone or within a graph).
+
+        May be called multiple times per Tracker; each call records a tool
+        call event for the provided tool key.
 
         :param tool_key: Identifier of the tool that was invoked.
         """
@@ -625,7 +664,11 @@ def __get_track_data(self):
 
     def track_invocation_success(self) -> None:
         """
-        Track a successful graph invocation.
+        Track a successful graph run.
+
+        Records at most once per graph tracker. track_invocation_success and
+        track_invocation_failure share state; only one of the two can record
+        per graph tracker, and subsequent calls are ignored.
         """
         if self._summary.success is not None:
             log.warning(
@@ -644,7 +687,11 @@ def track_invocation_success(self) -> None:
 
     def track_invocation_failure(self) -> None:
         """
-        Track an unsuccessful graph invocation.
+        Track an unsuccessful graph run.
+
+        Records at most once per graph tracker. track_invocation_success and
+        track_invocation_failure share state; only one of the two can record
+        per graph tracker, and subsequent calls are ignored.
         """
         if self._summary.success is not None:
             log.warning(
@@ -663,7 +710,9 @@ def track_invocation_failure(self) -> None:
 
     def track_duration(self, duration: int) -> None:
         """
-        Track the total duration of graph execution.
+        Track the total duration of a graph run.
+
+        Records at most once per graph tracker; further calls are ignored.
 
         :param duration: Duration in milliseconds.
         """
@@ -684,7 +733,9 @@ def track_duration(self, duration: int) -> None:
 
     def track_total_tokens(self, tokens: Optional[TokenUsage] = None) -> None:
         """
-        Track aggregated token usage across the entire graph invocation.
+        Track aggregated token usage across the entire graph run.
+
+        Records at most once per graph tracker; further calls are ignored.
 
         :param tokens: Token usage data, or ``None`` when usage is unknown.
         """
@@ -707,10 +758,14 @@ def track_total_tokens(self, tokens: Optional[TokenUsage] = None) -> None:
 
     def track_path(self, path: List[str]) -> None:
         """
-        Track the execution path through the graph.
+        Track the path traversed through the graph during a graph run.
 
         Appends to the summary's path list and fires a ``$ld:ai:graph:path``
-        event. Can be called multiple times to build the path incrementally.
+        event.
+
+        May be called multiple times per Tracker; each call records the
+        provided path segment and appends it to the summary so the full
+        path can be built incrementally.
 
         :param path: An array of configuration keys representing the sequence of nodes executed during graph traversal.
         """

From 7dd35093316056e820fe7262398d172804989da5 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Wed, 13 May 2026 11:57:43 -0500
Subject: [PATCH 3/3] docs: Avoid "run...runId" duplicate phrasing in
 resumption docs

Rephrase the from_resumption_token docstring so the reconstructed
tracker is associated with the "original tracker's runId" rather than
the "original AI run's runId" -- "run's runId" reads as a duplicate.
---
 packages/sdk/server-ai/src/ldai/tracker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py
index d6d6a1b..3570dc9 100644
--- a/packages/sdk/server-ai/src/ldai/tracker.py
+++ b/packages/sdk/server-ai/src/ldai/tracker.py
@@ -168,7 +168,7 @@ def from_resumption_token(cls, token: str, ld_client: LDClient, context: Context
 
         This is used for cross-process scenarios such as deferred feedback,
         where a different service needs to associate tracking events with the
-        original AI run's ``runId``.
+        original tracker's ``runId``.
 
         :param token: A URL-safe Base64-encoded resumption token obtained from
             :attr:`resumption_token`.