Skip to content

Commit 1fdfc0f

Browse files
committed
[doc] add docstrings, fix ci build errors
1 parent e87bdbd commit 1fdfc0f

9 files changed

Lines changed: 627 additions & 124 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
- Copyright: (C) Qianqian Fang (2019-2026) <q.fang at neu.edu>
66
- License: Apache License, Version 2.0
7-
- Version: 0.9.4
7+
- Version: 0.9.3
88
- URL: https://github.com/NeuroJSON/pyjdata
99
- Acknowledgement: This project is supported by US National Institute of Health (NIH)
1010
grant [U24-NS124027](https://reporter.nih.gov/project-details/10308329)

jdata/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
from .njprep import *
6161
from .neurojson import *
6262

63-
__version__ = "0.9.4"
63+
__version__ = "0.9.3"
6464
__all__ = (
6565
jdata.__all__
6666
+ jfile.__all__

jdata/jdata.py

Lines changed: 224 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,41 @@
8686

8787

8888
def encode(d, opt=None, **kwargs):
89-
"""@brief Encoding a Python data structure to portable JData-annotated dict constructs
90-
91-
This function converts complex data types (usually not JSON-serializable) into
92-
portable JData-annotated dict/list constructs that can be easily exported as JSON/JData
93-
files
94-
95-
@param[in,out] d: an arbitrary Python data
96-
@param[in] opt: options, can contain a dict with the following keys
97-
'compression': choose one of ['zlib','lzma','gzip','lz4','blosc2blosclz','blosc2lz4',
98-
'blosc2lz4hc','blosc2zlib','blosc2zstd'] for compression codec, default is None
99-
'nthread': number of compression thread of the codec is of the blosc2 class, default is 1
89+
"""
90+
Encode a Python data structure to portable JData-annotated dict constructs.
91+
92+
Converts complex data types (numpy arrays, complex numbers, special floats)
93+
into JData-annotated dict/list constructs that can be serialized as JSON or
94+
binary JSON. Scalar types (int, float, str, bool, None) pass through unchanged.
95+
96+
Args:
97+
d: An arbitrary Python data structure to encode. Supported types include
98+
float, int, str, bool, None, list, tuple, set, frozenset, dict,
99+
complex, numpy.ndarray, and nested combinations thereof.
100+
opt (dict, optional): Legacy options dict. If provided, its contents are
101+
merged into kwargs. Prefer passing options as keyword arguments directly.
102+
**kwargs: Encoding options including:
103+
compression (str): Compression codec for numpy arrays. One of 'zlib',
104+
'gzip', 'lzma', 'lz4', 'base64', or blosc2 variants. Default 'zlib'.
105+
compressarraysize (int): Minimum array size (in elements) to trigger
106+
compression. Default 200.
107+
base64 (bool): If True, base64-encode compressed data for text JSON.
108+
inplace (bool): If True, make deep copies to avoid mutating input.
109+
110+
Returns:
111+
The JData-annotated version of the input. Numpy arrays become dicts with
112+
keys like '_ArrayType_', '_ArraySize_', '_ArrayData_' (or '_ArrayZipData_'
113+
if compressed). Special floats become '_NaN_', '_Inf_', '-_Inf_'. Other
114+
types pass through or are recursively encoded.
115+
116+
Examples:
117+
>>> import numpy as np
118+
>>> encode(float('nan'))
119+
'_NaN_'
120+
>>> encode(np.array([1, 2, 3], dtype=np.uint8))
121+
{'_ArrayType_': 'uint8', '_ArraySize_': [3], '_ArrayData_': ...}
122+
>>> encode({'a': [1, 2], 'b': np.zeros(3)})
123+
{'a': [1, 2], 'b': {'_ArrayType_': 'double', ...}}
100124
"""
101125
if opt is None:
102126
opt = {}
@@ -238,14 +262,30 @@ def encode(d, opt=None, **kwargs):
238262

239263

240264
def decode(d, opt=None, **kwargs):
241-
"""@brief Decoding a JData-annotated dict construct into native Python data
242-
243-
This function converts portable JData-annotated dict/list constructs back to native Python
244-
data structures
245-
246-
@param[in,out] d: an arbitrary Python data, any JData-encoded components will be decoded
247-
@param[in] opt: options, can contain a dict with the following keys
248-
'nthread': number of decompression thread of the codec is of the blosc2 class, default is 1
265+
"""
266+
Decode JData-annotated dict constructs back into native Python data.
267+
268+
Reverses the encoding performed by encode(). Recognizes JData annotation keys
269+
('_ArrayType_', '_ArraySize_', '_ArrayData_', '_ArrayZipData_', etc.) and
270+
reconstructs numpy arrays, complex numbers, and special float values.
271+
272+
Args:
273+
d: A JData-annotated Python data structure (dict, list, or scalar).
274+
opt (dict, optional): Legacy options dict merged into kwargs.
275+
**kwargs: Decoding options including:
276+
base64 (bool): If True, expect base64-encoded compressed data.
277+
inplace (bool): If True, make deep copies to avoid mutating input.
278+
maxlinklevel (int): Maximum depth for resolving '_DataLink_' references.
279+
280+
Returns:
281+
The decoded native Python data structure with numpy arrays, complex numbers,
282+
and special floats restored.
283+
284+
Examples:
285+
>>> decode('_NaN_')
286+
nan
287+
>>> decode({'_ArrayType_': 'uint8', '_ArraySize_': [3], '_ArrayData_': [1, 2, 3]})
288+
array([1, 2, 3], dtype=uint8)
249289
"""
250290

251291
if opt is None:
@@ -403,6 +443,20 @@ def decode(d, opt=None, **kwargs):
403443

404444

405445
def jsonfilter(obj):
446+
"""
447+
JSON serialization fallback handler for non-serializable Python types.
448+
449+
Intended for use as the 'default' parameter of json.dumps(). Converts numpy
450+
types, bytes, and special floats to JSON-compatible representations.
451+
452+
Args:
453+
obj: A Python object that is not natively JSON-serializable.
454+
455+
Returns:
456+
A JSON-serializable equivalent: numpy arrays become lists, numpy scalars
457+
become Python scalars, bytes become UTF-8 strings, NaN/Inf become JData
458+
string annotations. Returns None if the type is not handled.
459+
"""
406460
if type(obj) == "long":
407461
return str(obj)
408462
elif type(obj).__module__ == np.__name__:
@@ -423,6 +477,19 @@ def jsonfilter(obj):
423477

424478

425479
def encodedict(d0, **kwargs):
480+
"""
481+
Encode all values in a dict using JData annotations.
482+
483+
Iterates over key-value pairs and recursively calls encode() on each.
484+
Keys that are themselves non-string types are also encoded.
485+
486+
Args:
487+
d0 (dict): The input dictionary to encode.
488+
**kwargs: Options passed through to encode() for each value.
489+
490+
Returns:
491+
dict: A new dictionary with all values JData-encoded.
492+
"""
426493
d = dict(d0)
427494
for k, v in d0.items():
428495
if isinstance(v, np.ndarray) and isinstance(k, str) and (k in _allownumpy):
@@ -438,6 +505,19 @@ def encodedict(d0, **kwargs):
438505

439506

440507
def encodelist(d0, **kwargs):
508+
"""
509+
Encode all elements in a list using JData annotations.
510+
511+
Iterates over list elements and recursively calls encode() on each.
512+
513+
Args:
514+
d0 (list): The input list to encode.
515+
**kwargs: Options passed through to encode() for each element.
516+
inplace (bool): If True, deep-copy elements before encoding.
517+
518+
Returns:
519+
list: A new list with all elements JData-encoded.
520+
"""
441521
if kwargs.get("inplace", False):
442522
d = [copy.deepcopy(x) if not isinstance(x, np.ndarray) else x for x in d0]
443523
else:
@@ -451,6 +531,18 @@ def encodelist(d0, **kwargs):
451531

452532

453533
def decodedict(d0, **kwargs):
534+
"""
535+
Decode all values in a JData-annotated dict back to native types.
536+
537+
Iterates over key-value pairs and recursively calls decode() on each value.
538+
539+
Args:
540+
d0 (dict): The input JData-annotated dictionary.
541+
**kwargs: Options passed through to decode() for each value.
542+
543+
Returns:
544+
dict: A new dictionary with all values decoded to native Python types.
545+
"""
454546
d = dict(d0)
455547
for k, v in d.items():
456548
newkey = encode(k, **kwargs)
@@ -464,6 +556,19 @@ def decodedict(d0, **kwargs):
464556

465557

466558
def decodelist(d0, **kwargs):
559+
"""
560+
Decode all elements in a JData-annotated list back to native types.
561+
562+
Iterates over list elements and recursively calls decode() on each.
563+
564+
Args:
565+
d0 (list): The input JData-annotated list.
566+
**kwargs: Options passed through to decode() for each element.
567+
inplace (bool): If True, deep-copy elements before decoding.
568+
569+
Returns:
570+
list: A new list with all elements decoded to native Python types.
571+
"""
467572
if kwargs.get("inplace", False):
468573
d = [copy.deepcopy(x) if not isinstance(x, np.ndarray) else x for x in d0]
469574
else:
@@ -477,13 +582,31 @@ def decodelist(d0, **kwargs):
477582

478583

479584
def zlibencode(buf):
585+
"""
586+
Compress a bytes buffer using zlib.
587+
588+
Args:
589+
buf (bytes): Raw byte data to compress.
590+
591+
Returns:
592+
bytes: Zlib-compressed data.
593+
"""
480594
return zlib.compress(buf)
481595

482596

483597
# -------------------------------------------------------------------------------------
484598

485599

486600
def gzipencode(buf):
601+
"""
602+
Compress a bytes buffer using gzip format.
603+
604+
Args:
605+
buf (bytes): Raw byte data to compress.
606+
607+
Returns:
608+
bytes: Gzip-compressed data.
609+
"""
487610
gzipper = zlib.compressobj(wbits=(zlib.MAX_WBITS | 16))
488611
newbuf = gzipper.compress(buf)
489612
newbuf += gzipper.flush()
@@ -494,13 +617,36 @@ def gzipencode(buf):
494617

495618

496619
def lzmaencode(buf):
620+
"""
621+
Compress a bytes buffer using LZMA (FORMAT_ALONE).
622+
623+
Args:
624+
buf (bytes): Raw byte data to compress.
625+
626+
Returns:
627+
bytes: LZMA-compressed data.
628+
"""
497629
return lzma.compress(buf, lzma.FORMAT_ALONE)
498630

499631

500632
# -------------------------------------------------------------------------------------
501633

502634

503635
def lz4encode(buf):
636+
"""
637+
Compress a bytes buffer using LZ4 frame format.
638+
639+
Requires the 'lz4' package to be installed.
640+
641+
Args:
642+
buf (bytes): Raw byte data to compress.
643+
644+
Returns:
645+
bytes: LZ4-compressed data.
646+
647+
Raises:
648+
ImportError: If the lz4 module is not installed.
649+
"""
504650
try:
505651
import lz4.frame
506652
except ImportError:
@@ -515,27 +661,63 @@ def lz4encode(buf):
515661

516662

517663
def base64encode(buf):
664+
"""
665+
Encode a bytes buffer to base64.
666+
667+
Args:
668+
buf (bytes): Raw byte data to encode.
669+
670+
Returns:
671+
bytes: Base64-encoded data.
672+
"""
518673
return base64.b64encode(buf)
519674

520675

521676
# -------------------------------------------------------------------------------------
522677

523678

524679
def zlibdecode(buf):
680+
"""
681+
Decompress a zlib-compressed bytes buffer.
682+
683+
Args:
684+
buf (bytes): Zlib-compressed byte data.
685+
686+
Returns:
687+
bytes: Decompressed raw data.
688+
"""
525689
return zlib.decompress(buf)
526690

527691

528692
# -------------------------------------------------------------------------------------
529693

530694

531695
def gzipdecode(buf):
696+
"""
697+
Decompress a gzip-compressed bytes buffer.
698+
699+
Args:
700+
buf (bytes): Gzip-compressed byte data.
701+
702+
Returns:
703+
bytes: Decompressed raw data.
704+
"""
532705
return zlib.decompress(bytes(buf), zlib.MAX_WBITS | 32)
533706

534707

535708
# -------------------------------------------------------------------------------------
536709

537710

538711
def lzmadecode(buf):
712+
"""
713+
Decompress an LZMA-compressed bytes buffer.
714+
715+
Args:
716+
buf (bytes): LZMA-compressed byte data.
717+
718+
Returns:
719+
bytes: Decompressed raw data.
720+
"""
539721
newbuf = bytearray(buf) # set length to -1 (unknown) if EOF appears
540722
newbuf[5:13] = b"\xff\xff\xff\xff\xff\xff\xff\xff"
541723
return lzma.decompress(newbuf, lzma.FORMAT_ALONE)
@@ -545,6 +727,20 @@ def lzmadecode(buf):
545727

546728

547729
def lz4decode(buf):
730+
"""
731+
Decompress an LZ4-compressed bytes buffer.
732+
733+
Requires the 'lz4' package to be installed.
734+
735+
Args:
736+
buf (bytes): LZ4-compressed byte data.
737+
738+
Returns:
739+
bytes: Decompressed raw data.
740+
741+
Raises:
742+
ImportError: If the lz4 module is not installed.
743+
"""
548744
try:
549745
import lz4.frame
550746
except ImportError:
@@ -559,6 +755,15 @@ def lz4decode(buf):
559755

560756

561757
def base64decode(buf):
758+
"""
759+
Decode a base64-encoded bytes buffer.
760+
761+
Args:
762+
buf (bytes): Base64-encoded byte data.
763+
764+
Returns:
765+
bytes: Decoded raw data.
766+
"""
562767
return base64.b64decode(buf)
563768

564769

0 commit comments

Comments
 (0)