77
88from __future__ import annotations
99
10+ import builtins
11+ import inspect
12+ import linecache
13+ import textwrap
14+ from dataclasses import asdict
15+
16+ import numpy as np
1017from msgpack import ExtType , packb , unpackb
1118
1219from blosc2 import blosc2_ext
20+ from blosc2 .dsl_kernel import DSLKernel
1321
1422# Msgpack extension type codes are application-defined. Reserve code 42 in
1523# python-blosc2 for values serialized as Blosc2 CFrames via ``to_cframe()`` and
2129# stable ``kind`` and ``version`` envelope.
2230_BLOSC2_STRUCTURED_EXT_CODE = 43
2331_BLOSC2_STRUCTURED_VERSION = 1
32+ _BLOSC2_DSL_VERSION = 1
2433
2534
2635def _encode_operand_reference (obj ):
@@ -74,6 +83,43 @@ def _encode_structured_reference(obj):
7483 "operands" : {key : _encode_operand_reference (value ) for key , value in operands .items ()},
7584 }
7685 return ExtType (_BLOSC2_STRUCTURED_EXT_CODE , packb (payload , use_bin_type = True ))
86+ if isinstance (obj , blosc2 .LazyUDF ):
87+ if not isinstance (obj .func , DSLKernel ):
88+ raise TypeError ("Structured Blosc2 msgpack payload only supports LazyUDF backed by DSLKernel" )
89+ udf_func = obj .func .func
90+ udf_name = getattr (udf_func , "__name__" , obj .func .__name__ )
91+ try :
92+ udf_source = textwrap .dedent (inspect .getsource (udf_func )).lstrip ()
93+ except Exception :
94+ udf_source = obj .func .dsl_source
95+ if udf_source is None :
96+ raise ValueError ("Structured LazyUDF msgpack payload requires recoverable DSL kernel source" )
97+ kwargs = {}
98+ for key , value in obj .kwargs .items ():
99+ if key in {"dtype" , "shape" }:
100+ continue
101+ if isinstance (value , blosc2 .CParams | blosc2 .DParams ):
102+ kwargs [key ] = asdict (value )
103+ else :
104+ kwargs [key ] = value
105+ # Keep both source forms:
106+ # - udf_source recreates the executable Python function object
107+ # - dsl_source preserves the DSLKernel's normalized DSL metadata so the
108+ # reconstructed function can keep its DSL identity and fast-path hints
109+ payload = {
110+ "kind" : "lazyudf" ,
111+ "version" : _BLOSC2_STRUCTURED_VERSION ,
112+ "function_kind" : "dsl" ,
113+ "dsl_version" : _BLOSC2_DSL_VERSION ,
114+ "name" : udf_name ,
115+ "udf_source" : udf_source ,
116+ "dsl_source" : obj .func .dsl_source ,
117+ "dtype" : np .dtype (obj .dtype ).str ,
118+ "shape" : list (obj .shape ),
119+ "operands" : {f"o{ i } " : _encode_operand_reference (value ) for i , value in enumerate (obj .inputs )},
120+ "kwargs" : kwargs ,
121+ }
122+ return ExtType (_BLOSC2_STRUCTURED_EXT_CODE , packb (payload , use_bin_type = True ))
77123 return None
78124
79125
@@ -113,8 +159,6 @@ def _decode_operand_reference(payload):
113159
114160
115161def _decode_structured_reference (data ):
116- import blosc2
117-
118162 payload = unpackb (data )
119163 if not isinstance (payload , dict ):
120164 raise TypeError ("Structured Blosc2 msgpack payload must decode to a mapping" )
@@ -127,29 +171,80 @@ def _decode_structured_reference(data):
127171 if kind == "c2array" :
128172 return _decode_operand_reference (payload )
129173 if kind == "lazyexpr" :
130- expression = payload .get ("expression" )
131- if not isinstance (expression , str ):
132- raise TypeError ("Structured LazyExpr msgpack payload requires a string 'expression'" )
133- operands_payload = payload .get ("operands" )
134- if not isinstance (operands_payload , dict ):
135- raise TypeError ("Structured LazyExpr msgpack payload requires a mapping 'operands'" )
136- operands = {key : _decode_operand_reference (value ) for key , value in operands_payload .items ()}
137- return blosc2 .lazyexpr (expression , operands = operands )
174+ return _decode_structured_lazyexpr (payload )
175+ if kind == "lazyudf" :
176+ return _decode_structured_lazyudf (payload )
138177 raise ValueError (f"Unsupported structured Blosc2 msgpack payload kind: { kind !r} " )
139178
140179
180+ def _decode_structured_lazyexpr (payload ):
181+ import blosc2
182+
183+ expression = payload .get ("expression" )
184+ if not isinstance (expression , str ):
185+ raise TypeError ("Structured LazyExpr msgpack payload requires a string 'expression'" )
186+ operands_payload = payload .get ("operands" )
187+ if not isinstance (operands_payload , dict ):
188+ raise TypeError ("Structured LazyExpr msgpack payload requires a mapping 'operands'" )
189+ operands = {key : _decode_operand_reference (value ) for key , value in operands_payload .items ()}
190+ return blosc2 .lazyexpr (expression , operands = operands )
191+
192+
193+ def _decode_structured_lazyudf (payload ):
194+ import blosc2
195+
196+ function_kind = payload .get ("function_kind" )
197+ if function_kind != "dsl" :
198+ raise ValueError (f"Unsupported structured LazyUDF function kind: { function_kind !r} " )
199+ dsl_version = payload .get ("dsl_version" )
200+ if dsl_version != _BLOSC2_DSL_VERSION :
201+ raise ValueError (f"Unsupported structured LazyUDF DSL version: { dsl_version !r} " )
202+ udf_source = payload .get ("udf_source" )
203+ if not isinstance (udf_source , str ):
204+ raise TypeError ("Structured LazyUDF msgpack payload requires a string 'udf_source'" )
205+ name = payload .get ("name" )
206+ if not isinstance (name , str ):
207+ raise TypeError ("Structured LazyUDF msgpack payload requires a string 'name'" )
208+ dtype = payload .get ("dtype" )
209+ if not isinstance (dtype , str ):
210+ raise TypeError ("Structured LazyUDF msgpack payload requires a string 'dtype'" )
211+ shape_payload = payload .get ("shape" )
212+ if not isinstance (shape_payload , list ):
213+ raise TypeError ("Structured LazyUDF msgpack payload requires a list 'shape'" )
214+ operands_payload = payload .get ("operands" )
215+ if not isinstance (operands_payload , dict ):
216+ raise TypeError ("Structured LazyUDF msgpack payload requires a mapping 'operands'" )
217+ kwargs = payload .get ("kwargs" , {})
218+ if not isinstance (kwargs , dict ):
219+ raise TypeError ("Structured LazyUDF msgpack payload requires a mapping 'kwargs'" )
220+
221+ local_ns = {}
222+ filename = f"<{ name } >"
223+ safe_globals = {
224+ "__builtins__" : {k : v for k , v in builtins .__dict__ .items () if k != "__import__" },
225+ "np" : np ,
226+ "blosc2" : blosc2 ,
227+ }
228+ linecache .cache [filename ] = (len (udf_source ), None , udf_source .splitlines (True ), filename )
229+ exec (compile (udf_source , filename , "exec" ), safe_globals , local_ns )
230+ func = local_ns [name ]
231+ if not isinstance (func , DSLKernel ):
232+ func = DSLKernel (func )
233+ dsl_source = payload .get ("dsl_source" )
234+ if dsl_source is not None and func .dsl_source is None :
235+ func .dsl_source = dsl_source
236+
237+ operands = tuple (
238+ _decode_operand_reference (operands_payload [f"o{ n } " ]) for n in range (len (operands_payload ))
239+ )
240+ return blosc2 .lazyudf (func , operands , dtype = np .dtype (dtype ), shape = tuple (shape_payload ), ** kwargs )
241+
242+
141243def _encode_msgpack_ext (obj ):
142244 import blosc2
143245
144246 if isinstance (
145- obj ,
146- (
147- blosc2 .NDArray ,
148- blosc2 .SChunk ,
149- blosc2 .VLArray ,
150- blosc2 .BatchStore ,
151- blosc2 .EmbedStore ,
152- ),
247+ obj , blosc2 .NDArray | blosc2 .SChunk | blosc2 .VLArray | blosc2 .BatchStore | blosc2 .EmbedStore
153248 ):
154249 return ExtType (_BLOSC2_EXT_CODE , obj .to_cframe ())
155250 structured = _encode_structured_reference (obj )
0 commit comments