Skip to content

Commit 6611957

Browse files
feat(openai): Set system instruction attribute for Completions API (#5358)
Set the system instruction attribute on `gen_ai.chat` spans generated when using the Completions API in the `OpenAIIntegration`. Extract instructions both from content strings and parts-style content lists.
1 parent 0d42476 commit 6611957

2 files changed

Lines changed: 692 additions & 84 deletions

File tree

sentry_sdk/integrations/openai.py

Lines changed: 182 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import sys
2-
from functools import wraps
32
import time
3+
from functools import wraps
4+
from collections.abc import Iterable
45

56
import sentry_sdk
67
from sentry_sdk import consts
@@ -24,8 +25,20 @@
2425
from typing import TYPE_CHECKING
2526

2627
if TYPE_CHECKING:
27-
from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
28+
from typing import (
29+
Any,
30+
List,
31+
Optional,
32+
Callable,
33+
AsyncIterator,
34+
Iterator,
35+
Union,
36+
)
2837
from sentry_sdk.tracing import Span
38+
from sentry_sdk._types import TextPart
39+
40+
from openai.types.responses import ResponseInputParam, ResponseInputItemParam
41+
from openai import Omit
2942

3043
try:
3144
try:
@@ -42,7 +55,11 @@
4255
from openai.resources import Embeddings, AsyncEmbeddings
4356

4457
if TYPE_CHECKING:
45-
from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk
58+
from openai.types.chat import (
59+
ChatCompletionMessageParam,
60+
ChatCompletionChunk,
61+
ChatCompletionSystemMessageParam,
62+
)
4663
except ImportError:
4764
raise DidNotEnable("OpenAI not installed")
4865

@@ -183,12 +200,48 @@ def _calculate_token_usage(
183200
)
184201

185202

186-
def _set_input_data(
187-
span: "Span",
203+
def _is_system_instruction_completions(message: "ChatCompletionMessageParam") -> bool:
204+
return isinstance(message, dict) and message.get("role") == "system"
205+
206+
207+
def _get_system_instructions_completions(
208+
messages: "Iterable[ChatCompletionMessageParam]",
209+
) -> "list[ChatCompletionMessageParam]":
210+
if not isinstance(messages, Iterable):
211+
return []
212+
213+
return [
214+
message for message in messages if _is_system_instruction_completions(message)
215+
]
216+
217+
218+
def _transform_system_instructions(
219+
system_instructions: "list[ChatCompletionSystemMessageParam]",
220+
) -> "list[TextPart]":
221+
instruction_text_parts: "list[TextPart]" = []
222+
223+
for instruction in system_instructions:
224+
if not isinstance(instruction, dict):
225+
continue
226+
227+
content = instruction.get("content")
228+
229+
if isinstance(content, str):
230+
instruction_text_parts.append({"type": "text", "content": content})
231+
232+
elif isinstance(content, list):
233+
for part in content:
234+
if isinstance(part, dict) and part.get("type") == "text":
235+
text = part.get("text", "")
236+
if text:
237+
instruction_text_parts.append({"type": "text", "content": text})
238+
239+
return instruction_text_parts
240+
241+
242+
def _get_input_messages(
188243
kwargs: "dict[str, Any]",
189-
operation: str,
190-
integration: "OpenAIIntegration",
191-
) -> None:
244+
) -> "Optional[Union[Iterable[Any], list[str]]]":
192245
# Input messages (the prompt or data sent to the model)
193246
messages = kwargs.get("messages")
194247
if messages is None:
@@ -197,29 +250,15 @@ def _set_input_data(
197250
if isinstance(messages, str):
198251
messages = [messages]
199252

200-
if (
201-
messages is not None
202-
and len(messages) > 0
203-
and should_send_default_pii()
204-
and integration.include_prompts
205-
):
206-
normalized_messages = normalize_message_roles(messages)
207-
scope = sentry_sdk.get_current_scope()
208-
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
209-
if messages_data is not None:
210-
# Use appropriate field based on operation type
211-
if operation == "embeddings":
212-
set_data_normalized(
213-
span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False
214-
)
215-
else:
216-
set_data_normalized(
217-
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
218-
)
253+
return messages
254+
219255

256+
def _commmon_set_input_data(
257+
span: "Span",
258+
kwargs: "dict[str, Any]",
259+
) -> None:
220260
# Input attributes: Common
221261
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
222-
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
223262

224263
# Input attributes: Optional
225264
kwargs_keys_to_attributes = {
@@ -245,6 +284,115 @@ def _set_input_data(
245284
)
246285

247286

287+
def _set_responses_api_input_data(
288+
span: "Span",
289+
kwargs: "dict[str, Any]",
290+
integration: "OpenAIIntegration",
291+
) -> None:
292+
messages: "Optional[Union[ResponseInputParam, list[str]]]" = _get_input_messages(
293+
kwargs
294+
)
295+
296+
if (
297+
messages is not None
298+
and len(messages) > 0
299+
and should_send_default_pii()
300+
and integration.include_prompts
301+
):
302+
normalized_messages = normalize_message_roles(messages) # type: ignore
303+
scope = sentry_sdk.get_current_scope()
304+
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
305+
if messages_data is not None:
306+
set_data_normalized(
307+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
308+
)
309+
310+
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses")
311+
_commmon_set_input_data(span, kwargs)
312+
313+
314+
def _set_completions_api_input_data(
315+
span: "Span",
316+
kwargs: "dict[str, Any]",
317+
integration: "OpenAIIntegration",
318+
) -> None:
319+
messages: "Optional[Union[str, Iterable[ChatCompletionMessageParam]]]" = kwargs.get(
320+
"messages"
321+
)
322+
323+
if (
324+
not should_send_default_pii()
325+
or not integration.include_prompts
326+
or messages is None
327+
):
328+
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
329+
_commmon_set_input_data(span, kwargs)
330+
return
331+
332+
system_instructions = _get_system_instructions_completions(messages)
333+
if len(system_instructions) > 0:
334+
set_data_normalized(
335+
span,
336+
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
337+
_transform_system_instructions(system_instructions),
338+
unpack=False,
339+
)
340+
341+
if isinstance(messages, str):
342+
normalized_messages = normalize_message_roles([messages]) # type: ignore
343+
scope = sentry_sdk.get_current_scope()
344+
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
345+
if messages_data is not None:
346+
set_data_normalized(
347+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
348+
)
349+
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
350+
_commmon_set_input_data(span, kwargs)
351+
return
352+
353+
non_system_messages = [
354+
message
355+
for message in messages
356+
if not _is_system_instruction_completions(message)
357+
]
358+
if len(non_system_messages) > 0:
359+
normalized_messages = normalize_message_roles(non_system_messages) # type: ignore
360+
scope = sentry_sdk.get_current_scope()
361+
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
362+
if messages_data is not None:
363+
set_data_normalized(
364+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
365+
)
366+
367+
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
368+
_commmon_set_input_data(span, kwargs)
369+
370+
371+
def _set_embeddings_input_data(
372+
span: "Span",
373+
kwargs: "dict[str, Any]",
374+
integration: "OpenAIIntegration",
375+
) -> None:
376+
messages = _get_input_messages(kwargs)
377+
378+
if (
379+
messages is not None
380+
and len(messages) > 0 # type: ignore
381+
and should_send_default_pii()
382+
and integration.include_prompts
383+
):
384+
normalized_messages = normalize_message_roles(messages) # type: ignore
385+
scope = sentry_sdk.get_current_scope()
386+
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
387+
if messages_data is not None:
388+
set_data_normalized(
389+
span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False
390+
)
391+
392+
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings")
393+
_commmon_set_input_data(span, kwargs)
394+
395+
248396
def _set_output_data(
249397
span: "Span",
250398
response: "Any",
@@ -476,16 +624,15 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
476624
return f(*args, **kwargs)
477625

478626
model = kwargs.get("model")
479-
operation = "chat"
480627

481628
span = sentry_sdk.start_span(
482629
op=consts.OP.GEN_AI_CHAT,
483-
name=f"{operation} {model}",
630+
name=f"chat {model}",
484631
origin=OpenAIIntegration.origin,
485632
)
486633
span.__enter__()
487634

488-
_set_input_data(span, kwargs, operation, integration)
635+
_set_completions_api_input_data(span, kwargs, integration)
489636

490637
start_time = time.perf_counter()
491638
response = yield f, args, kwargs
@@ -569,14 +716,13 @@ def _new_embeddings_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A
569716
return f(*args, **kwargs)
570717

571718
model = kwargs.get("model")
572-
operation = "embeddings"
573719

574720
with sentry_sdk.start_span(
575721
op=consts.OP.GEN_AI_EMBEDDINGS,
576-
name=f"{operation} {model}",
722+
name=f"embeddings {model}",
577723
origin=OpenAIIntegration.origin,
578724
) as span:
579-
_set_input_data(span, kwargs, operation, integration)
725+
_set_embeddings_input_data(span, kwargs, integration)
580726

581727
response = yield f, args, kwargs
582728

@@ -657,16 +803,15 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
657803
return f(*args, **kwargs)
658804

659805
model = kwargs.get("model")
660-
operation = "responses"
661806

662807
span = sentry_sdk.start_span(
663808
op=consts.OP.GEN_AI_RESPONSES,
664-
name=f"{operation} {model}",
809+
name=f"responses {model}",
665810
origin=OpenAIIntegration.origin,
666811
)
667812
span.__enter__()
668813

669-
_set_input_data(span, kwargs, operation, integration)
814+
_set_responses_api_input_data(span, kwargs, integration)
670815

671816
start_time = time.perf_counter()
672817
response = yield f, args, kwargs

0 commit comments

Comments
 (0)