Skip to content

Commit 0adede8

Browse files
authored
parser: fix structured output bug (ggml-org#22302)
* fix very stupid structured output bug * Things just cannot be too easy.
1 parent 361fe72 commit 0adede8

2 files changed

Lines changed: 72 additions & 10 deletions

File tree

scripts/server-test-structured.py

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,12 @@
33
Test structured output capability via chat completions endpoint.
44
55
Each test case contains:
6-
- response_format: OpenAI-compatible response_format specification
7-
(json_schema only — llama.cpp does not support json_object)
6+
- response_format: OpenAI-compatible response_format specification.
7+
Both "json_schema" and "json_object" are accepted; with
8+
"json_object" a schema can be supplied via extra_body.
9+
- extra_body (optional): dict of extra top-level request fields merged into
10+
the request payload (mirrors the OpenAI SDK's extra_body
11+
feature; llama.cpp reads a top-level "json_schema" here).
812
- messages: initial conversation messages
913
- tools (optional): tool definitions (for mixed tool + structured tests)
1014
- mock_tool_responses (optional): dict mapping tool_name -> callable(arguments) -> str (JSON)
@@ -81,11 +85,14 @@ def print_info(msg):
8185
_print(f"{DIM}{msg}{RESET}")
8286

8387

84-
def print_schema_note(label, rf):
88+
def print_schema_note(label, rf, extra_body=None):
8589
kind = rf.get("type", "?")
8690
name = ""
8791
if kind == "json_schema":
8892
name = rf.get("json_schema", {}).get("name", "")
93+
elif kind == "json_object" and extra_body and "json_schema" in extra_body:
94+
extra_schema = extra_body["json_schema"] or {}
95+
name = extra_schema.get("title") or "extra_body.json_schema"
8996
_print(f"{DIM}{MAGENTA} ⟐ response_format [{label}]: {kind}"
9097
f"{(' / ' + name) if name else ''}{RESET}")
9198

@@ -95,17 +102,20 @@ def print_schema_note(label, rf):
95102
# ---------------------------------------------------------------------------
96103

97104

98-
def chat_completion(url, messages, tools=None, response_format=None, stream=False):
105+
def chat_completion(url, messages, tools=None, response_format=None, stream=False,
106+
extra_body=None):
99107
payload = {
100108
"messages": messages,
101109
"stream": stream,
102-
"max_tokens": 4096,
110+
"max_tokens": 8192,
103111
}
104112
if tools:
105113
payload["tools"] = tools
106114
payload["tool_choice"] = "auto"
107115
if response_format is not None:
108116
payload["response_format"] = response_format
117+
if extra_body:
118+
payload.update(extra_body)
109119

110120
try:
111121
response = requests.post(url, json=payload, stream=stream)
@@ -180,7 +190,7 @@ def chat_completion(url, messages, tools=None, response_format=None, stream=Fals
180190

181191
def run_tool_loop(
182192
url, messages, tools, mock_tool_responses, stream, response_format=None,
183-
max_turns=6,
193+
extra_body=None, max_turns=6,
184194
):
185195
"""
186196
Drive the tool-call loop. If response_format is provided it is applied to
@@ -191,7 +201,8 @@ def run_tool_loop(
191201

192202
for _ in range(max_turns):
193203
result = chat_completion(
194-
url, msgs, tools=tools, response_format=response_format, stream=stream
204+
url, msgs, tools=tools, response_format=response_format, stream=stream,
205+
extra_body=extra_body,
195206
)
196207
if result is None:
197208
return all_tool_calls, msgs, None
@@ -274,7 +285,8 @@ def run_test(url, test_case, stream):
274285
print_header(f"{name} [{mode}] ({apply_stage})")
275286

276287
response_format = test_case["response_format"]
277-
print_schema_note(apply_stage, response_format)
288+
extra_body = test_case.get("extra_body")
289+
print_schema_note(apply_stage, response_format, extra_body)
278290

279291
tools = test_case.get("tools")
280292
mocks = test_case.get("mock_tool_responses") or {}
@@ -290,6 +302,7 @@ def run_test(url, test_case, stream):
290302
mock_tool_responses=mocks,
291303
stream=stream,
292304
response_format=response_format,
305+
extra_body=extra_body,
293306
)
294307
elif apply_stage == "after_tools":
295308
# Phase 1: plain tool loop, no response_format applied yet.
@@ -314,7 +327,8 @@ def run_test(url, test_case, stream):
314327
# model focuses on producing the schema-constrained answer.
315328
_print(f"\n{DIM}{MAGENTA} ⟐ follow-up turn with response_format applied{RESET}")
316329
result = chat_completion(
317-
url, msgs, tools=None, response_format=response_format, stream=stream
330+
url, msgs, tools=None, response_format=response_format, stream=stream,
331+
extra_body=extra_body,
318332
)
319333
final_content = result["content"] if result else None
320334
else:
@@ -481,6 +495,51 @@ def _validate_sentiment(parsed):
481495
return True, f"sentiment={parsed['sentiment']} conf={conf} kws={kws}"
482496

483497

498+
# ---- Test: json_object + extra_body.json_schema (always) ----
499+
#
500+
# Exercises the llama.cpp-specific path where the OpenAI SDK would send
501+
# response_format={"type": "json_object"} and tunnel the schema through
502+
# extra_body.json_schema (which becomes a top-level "json_schema" field on
503+
# the request body).
504+
505+
_PRODUCT_JSON_OBJECT_SCHEMA = {
506+
"$schema": "https://json-schema.org/draft/2020-12/schema",
507+
"$id": "https://example.com/product.schema.json",
508+
"title": "Product",
509+
"description": "A product in the catalog",
510+
"type": "object",
511+
}
512+
513+
PRODUCT_JSON_OBJECT_TEST_CASE = {
514+
"name": "json_object response_format with extra_body json_schema",
515+
"response_format": {"type": "json_object"},
516+
"extra_body": {"json_schema": _PRODUCT_JSON_OBJECT_SCHEMA},
517+
"apply_stage": "always",
518+
"messages": [
519+
{
520+
"role": "system",
521+
"content": (
522+
"Extract structured data from the provided text according to the "
523+
"JSON schema. Return only valid JSON matching the schema exactly."
524+
),
525+
},
526+
{
527+
"role": "user",
528+
"content": "Product: Wireless Headphones, ID: 101, In Stock: Yes",
529+
},
530+
],
531+
"validate": lambda parsed, tcs, raw: _validate_product_json_object(parsed),
532+
}
533+
534+
535+
def _validate_product_json_object(parsed):
536+
if not isinstance(parsed, dict):
537+
return False, f"expected JSON object, got {type(parsed).__name__}: {parsed!r}"
538+
if not parsed:
539+
return False, f"expected non-empty object, got {parsed!r}"
540+
return True, f"product object with {len(parsed)} field(s): {sorted(parsed.keys())}"
541+
542+
484543
# ---- Test 3: Nested recipe schema (always) ----
485544

486545
_RECIPE_SCHEMA = {
@@ -915,6 +974,7 @@ def _validate_country_report(parsed, tcs):
915974
ALL_TEST_CASES = [
916975
BOOK_TEST_CASE,
917976
SENTIMENT_TEST_CASE,
977+
PRODUCT_JSON_OBJECT_TEST_CASE,
918978
RECIPE_TEST_CASE,
919979
SHOP_COMPARISON_TEST_CASE,
920980
COUNTRY_REPORT_TEST_CASE,

tools/server/server-common.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -947,7 +947,9 @@ json oaicompat_chat_params_parse(
947947
json response_format = json_value(body, "response_format", json::object());
948948
std::string response_type = json_value(response_format, "type", std::string());
949949
if (response_type == "json_object") {
950-
json_schema = json_value(response_format, "schema", json::object());
950+
if (response_format.contains("schema") || json_schema.empty()) {
951+
json_schema = json_value(response_format, "schema", json::object());
952+
}
951953
} else if (response_type == "json_schema") {
952954
auto schema_wrapper = json_value(response_format, "json_schema", json::object());
953955
json_schema = json_value(schema_wrapper, "schema", json::object());

0 commit comments

Comments
 (0)