33Test structured output capability via chat completions endpoint.
44
55Each test case contains:
6- - response_format: OpenAI-compatible response_format specification
7- (json_schema only — llama.cpp does not support json_object)
6+ - response_format: OpenAI-compatible response_format specification.
7+ Both "json_schema" and "json_object" are accepted; with
8+ "json_object" a schema can be supplied via extra_body.
9+ - extra_body (optional): dict of extra top-level request fields merged into
10+ the request payload (mirrors the OpenAI SDK's extra_body
11+ feature; llama.cpp reads a top-level "json_schema" here).
812 - messages: initial conversation messages
913 - tools (optional): tool definitions (for mixed tool + structured tests)
1014 - mock_tool_responses (optional): dict mapping tool_name -> callable(arguments) -> str (JSON)
@@ -81,11 +85,14 @@ def print_info(msg):
8185 _print (f"{ DIM } { msg } { RESET } " )
8286
8387
84- def print_schema_note (label , rf ):
88+ def print_schema_note (label , rf , extra_body = None ):
8589 kind = rf .get ("type" , "?" )
8690 name = ""
8791 if kind == "json_schema" :
8892 name = rf .get ("json_schema" , {}).get ("name" , "" )
93+ elif kind == "json_object" and extra_body and "json_schema" in extra_body :
94+ extra_schema = extra_body ["json_schema" ] or {}
95+ name = extra_schema .get ("title" ) or "extra_body.json_schema"
8996 _print (f"{ DIM } { MAGENTA } ⟐ response_format [{ label } ]: { kind } "
9097 f"{ (' / ' + name ) if name else '' } { RESET } " )
9198
@@ -95,17 +102,20 @@ def print_schema_note(label, rf):
95102# ---------------------------------------------------------------------------
96103
97104
98- def chat_completion (url , messages , tools = None , response_format = None , stream = False ):
105+ def chat_completion (url , messages , tools = None , response_format = None , stream = False ,
106+ extra_body = None ):
99107 payload = {
100108 "messages" : messages ,
101109 "stream" : stream ,
102- "max_tokens" : 4096 ,
110+ "max_tokens" : 8192 ,
103111 }
104112 if tools :
105113 payload ["tools" ] = tools
106114 payload ["tool_choice" ] = "auto"
107115 if response_format is not None :
108116 payload ["response_format" ] = response_format
117+ if extra_body :
118+ payload .update (extra_body )
109119
110120 try :
111121 response = requests .post (url , json = payload , stream = stream )
@@ -180,7 +190,7 @@ def chat_completion(url, messages, tools=None, response_format=None, stream=Fals
180190
181191def run_tool_loop (
182192 url , messages , tools , mock_tool_responses , stream , response_format = None ,
183- max_turns = 6 ,
193+ extra_body = None , max_turns = 6 ,
184194):
185195 """
186196 Drive the tool-call loop. If response_format is provided it is applied to
@@ -191,7 +201,8 @@ def run_tool_loop(
191201
192202 for _ in range (max_turns ):
193203 result = chat_completion (
194- url , msgs , tools = tools , response_format = response_format , stream = stream
204+ url , msgs , tools = tools , response_format = response_format , stream = stream ,
205+ extra_body = extra_body ,
195206 )
196207 if result is None :
197208 return all_tool_calls , msgs , None
@@ -274,7 +285,8 @@ def run_test(url, test_case, stream):
274285 print_header (f"{ name } [{ mode } ] ({ apply_stage } )" )
275286
276287 response_format = test_case ["response_format" ]
277- print_schema_note (apply_stage , response_format )
288+ extra_body = test_case .get ("extra_body" )
289+ print_schema_note (apply_stage , response_format , extra_body )
278290
279291 tools = test_case .get ("tools" )
280292 mocks = test_case .get ("mock_tool_responses" ) or {}
@@ -290,6 +302,7 @@ def run_test(url, test_case, stream):
290302 mock_tool_responses = mocks ,
291303 stream = stream ,
292304 response_format = response_format ,
305+ extra_body = extra_body ,
293306 )
294307 elif apply_stage == "after_tools" :
295308 # Phase 1: plain tool loop, no response_format applied yet.
@@ -314,7 +327,8 @@ def run_test(url, test_case, stream):
314327 # model focuses on producing the schema-constrained answer.
315328 _print (f"\n { DIM } { MAGENTA } ⟐ follow-up turn with response_format applied{ RESET } " )
316329 result = chat_completion (
317- url , msgs , tools = None , response_format = response_format , stream = stream
330+ url , msgs , tools = None , response_format = response_format , stream = stream ,
331+ extra_body = extra_body ,
318332 )
319333 final_content = result ["content" ] if result else None
320334 else :
@@ -481,6 +495,51 @@ def _validate_sentiment(parsed):
481495 return True , f"sentiment={ parsed ['sentiment' ]} conf={ conf } kws={ kws } "
482496
483497
498+ # ---- Test: json_object + extra_body.json_schema (always) ----
499+ #
500+ # Exercises the llama.cpp-specific path where the OpenAI SDK would send
501+ # response_format={"type": "json_object"} and tunnel the schema through
502+ # extra_body.json_schema (which becomes a top-level "json_schema" field on
503+ # the request body).
504+
505+ _PRODUCT_JSON_OBJECT_SCHEMA = {
506+ "$schema" : "https://json-schema.org/draft/2020-12/schema" ,
507+ "$id" : "https://example.com/product.schema.json" ,
508+ "title" : "Product" ,
509+ "description" : "A product in the catalog" ,
510+ "type" : "object" ,
511+ }
512+
513+ PRODUCT_JSON_OBJECT_TEST_CASE = {
514+ "name" : "json_object response_format with extra_body json_schema" ,
515+ "response_format" : {"type" : "json_object" },
516+ "extra_body" : {"json_schema" : _PRODUCT_JSON_OBJECT_SCHEMA },
517+ "apply_stage" : "always" ,
518+ "messages" : [
519+ {
520+ "role" : "system" ,
521+ "content" : (
522+ "Extract structured data from the provided text according to the "
523+ "JSON schema. Return only valid JSON matching the schema exactly."
524+ ),
525+ },
526+ {
527+ "role" : "user" ,
528+ "content" : "Product: Wireless Headphones, ID: 101, In Stock: Yes" ,
529+ },
530+ ],
531+ "validate" : lambda parsed , tcs , raw : _validate_product_json_object (parsed ),
532+ }
533+
534+
535+ def _validate_product_json_object (parsed ):
536+ if not isinstance (parsed , dict ):
537+ return False , f"expected JSON object, got { type (parsed ).__name__ } : { parsed !r} "
538+ if not parsed :
539+ return False , f"expected non-empty object, got { parsed !r} "
540+ return True , f"product object with { len (parsed )} field(s): { sorted (parsed .keys ())} "
541+
542+
484543# ---- Test 3: Nested recipe schema (always) ----
485544
486545_RECIPE_SCHEMA = {
@@ -915,6 +974,7 @@ def _validate_country_report(parsed, tcs):
915974ALL_TEST_CASES = [
916975 BOOK_TEST_CASE ,
917976 SENTIMENT_TEST_CASE ,
977+ PRODUCT_JSON_OBJECT_TEST_CASE ,
918978 RECIPE_TEST_CASE ,
919979 SHOP_COMPARISON_TEST_CASE ,
920980 COUNTRY_REPORT_TEST_CASE ,
0 commit comments