Skip to content
Closed
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
8c7d5c3
feat(python): add bfloat16 and bfloat16_array support
asadjan4611 Feb 12, 2026
c89d86e
style(python): fix code formatting for bfloat16 implementation
asadjan4611 Feb 12, 2026
b2bb7c6
fix(python): remove trailing newline in bfloat16_array.py to match co…
asadjan4611 Feb 12, 2026
6537f74
fix(python): resolve Cython compilation errors for bfloat16
asadjan4611 Feb 12, 2026
b6793a5
fix(python): use memcpy for safe type punning in bfloat16 conversion
asadjan4611 Feb 13, 2026
e387339
fix(python): use explicit size constant for ARM compatibility
asadjan4611 Feb 13, 2026
71241ba
fix(python): correct row schema Arrow conversion type ids
asadjan4611 Feb 13, 2026
8066ace
fix(python): build and export bfloat16 python module
asadjan4611 Feb 13, 2026
8ed6f57
docs(python): document bfloat16 support and stabilize pure mode seria…
asadjan4611 Feb 13, 2026
9fcb74d
fix(python): configure bazel shell for windows editable builds
asadjan4611 Feb 13, 2026
8866639
Merge branch 'main' into feat/python-bfloat16-support
asadjan4611 Feb 20, 2026
102acfb
fix(python): restore xlang serializer base after conflict merge
asadjan4611 Feb 20, 2026
bedb82e
fix(python): restore xlang serializer base in cython runtime
asadjan4611 Feb 21, 2026
2f2b6b4
fix(python): align bfloat16 serializers with unified API and typed bu…
asadjan4611 Feb 21, 2026
c9e2e12
fix(python): resolve bfloat16 cython build and serializer API drift
asadjan4611 Feb 21, 2026
56f45d3
style(python): align bfloat16 files with ci formatter
asadjan4611 Feb 21, 2026
efb0100
fix(python): include bfloat16 pxd in format cython target
asadjan4611 Feb 21, 2026
f1a3f42
fix(python): resolve bfloat16 cython redeclaration and static method …
asadjan4611 Feb 21, 2026
9d74ae2
fix(python): remove obsolete xlang arg for dataclass serializers
asadjan4611 Feb 21, 2026
ec59341
fix(python): accept legacy xlang kwarg in dataclass serializers
asadjan4611 Feb 21, 2026
848a1cd
fix(python): repair unsigned xlang dataclass refs and retry bazel fetch
asadjan4611 Feb 21, 2026
66dbf19
style(python): format setup retry log line
asadjan4611 Feb 21, 2026
3ac6ba8
fix(python): remove keyword arguments from cpdef function calls in co…
asadjan4611 Feb 21, 2026
308bde9
fix(python): remove keyword arguments from cpdef function calls in st…
asadjan4611 Feb 21, 2026
e3a1e63
fix(python): preserve bfloat16 NaN semantics, fix hash contract, expa…
asadjan4611 Mar 12, 2026
d5fa636
Merge apache/main into feat/python-bfloat16-support
asadjan4611 Mar 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ pyx_library(
],
)

pyx_library(
name = "bfloat16",
srcs = glob([
"python/pyfory/bfloat16.pyx",
"python/pyfory/__init__.py",
]),
cc_kwargs = dict(
linkstatic = 1,
),
)

pyx_library(
name = "_format",
srcs = glob(
Expand All @@ -96,6 +107,7 @@ genrule(
name = "cp_fory_so",
srcs = [
":python/pyfory/buffer.so",
":python/pyfory/bfloat16.so",
":python/pyfory/lib/mmh3/mmh3.so",
":python/pyfory/format/_format.so",
":python/pyfory/serialization.so",
Expand All @@ -111,11 +123,13 @@ genrule(
if [ "$${u_name: 0: 4}" == "MING" ] || [ "$${u_name: 0: 4}" == "MSYS" ]
then
cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory/buffer.pyd"
cp -f $(location python/pyfory/bfloat16.so) "$$WORK_DIR/python/pyfory/bfloat16.pyd"
cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3/mmh3.pyd"
cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format/_format.pyd"
cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory/serialization.pyd"
else
cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory"
cp -f $(location python/pyfory/bfloat16.so) "$$WORK_DIR/python/pyfory"
cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3"
cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format"
cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory"
Expand Down
28 changes: 28 additions & 0 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,34 @@ fory.register(Person.class, "example.Person");
Person person = (Person) fory.deserialize(binaryData);
```

### BFloat16 Support

`pyfory` supports `bfloat16` scalar values and `bfloat16` arrays in xlang mode:

- Scalar type: `pyfory.BFloat16` (type id `18`)
- Array type: `pyfory.BFloat16Array` (type id `54`)

```python
import pyfory
from pyfory import BFloat16, BFloat16Array

fory = pyfory.Fory(xlang=True, ref=False, strict=True)

# Scalar bfloat16
v = BFloat16(3.1415926)
data = fory.serialize(v)
out = fory.deserialize(data)
print(float(out))

# bfloat16 array
arr = BFloat16Array([1.0, 2.5, -3.25])
data = fory.serialize(arr)
out = fory.deserialize(data)
print(out)
```

`BFloat16Array` stores values in a packed `array('H')` representation and writes bytes in little-endian order for cross-language compatibility.

## 📊 Row Format - Zero-Copy Processing

Apache Fury™ provides a random-access row format that enables reading nested fields from binary data without full deserialization. This drastically reduces overhead when working with large objects where only partial data access is needed. The format also supports memory-mapped files for ultra-low memory footprint.
Expand Down
16 changes: 16 additions & 0 deletions python/pyfory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
TaggedUint64Serializer,
Float32Serializer,
Float64Serializer,
BFloat16Serializer,
StringSerializer,
DateSerializer,
TimestampSerializer,
Expand Down Expand Up @@ -88,6 +89,8 @@
tagged_uint64,
float32,
float64,
bfloat16,
bfloat16_array,
int8_array,
uint8_array,
int16_array,
Expand Down Expand Up @@ -118,6 +121,14 @@
from pyfory.policy import DeserializationPolicy # noqa: F401 # pylint: disable=unused-import
from pyfory.buffer import Buffer # noqa: F401 # pylint: disable=unused-import

# BFloat16 support
try:
from pyfory.bfloat16 import BFloat16 # noqa: F401
from pyfory.bfloat16_array import BFloat16Array # noqa: F401
except ImportError:
# Cython extensions not built yet
pass

__version__ = "0.16.0.dev0"

__all__ = [
Expand Down Expand Up @@ -151,6 +162,10 @@
"tagged_uint64",
"float32",
"float64",
"BFloat16",
"BFloat16Array",
"bfloat16",
"bfloat16_array",
"int8_array",
"uint8_array",
"int16_array",
Expand Down Expand Up @@ -192,6 +207,7 @@
"TaggedUint64Serializer",
"Float32Serializer",
"Float64Serializer",
"BFloat16Serializer",
"StringSerializer",
"DateSerializer",
"TimestampSerializer",
Expand Down
26 changes: 26 additions & 0 deletions python/pyfory/_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ def support_subclass(cls) -> bool:
return False


class XlangCompatibleSerializer(Serializer):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this, we've removed it in #3348

def __init__(self, fory, type_):
super().__init__(fory, type_)

def xwrite(self, buffer, value):
self.write(buffer, value)

def xread(self, buffer):
return self.read(buffer)


class BooleanSerializer(Serializer):
def write(self, buffer, value):
buffer.write_bool(value)
Expand Down Expand Up @@ -232,6 +243,21 @@ def read(self, buffer):
return buffer.read_double()


class BFloat16Serializer(XlangCompatibleSerializer):
def write(self, buffer, value):
from pyfory.bfloat16 import BFloat16

if isinstance(value, BFloat16):
buffer.write_bfloat16(value.to_bits())
else:
buffer.write_bfloat16(BFloat16(value).to_bits())

def read(self, buffer):
from pyfory.bfloat16 import BFloat16

return BFloat16.from_bits(buffer.read_bfloat16())


class StringSerializer(Serializer):
def __init__(self, fory, type_):
super().__init__(fory, type_)
Expand Down
116 changes: 116 additions & 0 deletions python/pyfory/bfloat16.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

from libc.stdint cimport uint16_t, uint32_t
from libc.string cimport memcpy

cdef inline uint16_t float32_to_bfloat16_bits(float value) nogil:
cdef uint32_t f32_bits
memcpy(&f32_bits, &value, 4)
cdef uint16_t bf16_bits = <uint16_t>(f32_bits >> 16)
cdef uint16_t truncated = <uint16_t>(f32_bits & 0xFFFF)
if truncated > 0x8000:
bf16_bits += 1
if (bf16_bits & 0x7F80) == 0x7F80:
bf16_bits = (bf16_bits & 0x8000) | 0x7F80
elif truncated == 0x8000 and (bf16_bits & 1):
bf16_bits += 1
if (bf16_bits & 0x7F80) == 0x7F80:
bf16_bits = (bf16_bits & 0x8000) | 0x7F80
return bf16_bits

cdef inline float bfloat16_bits_to_float32(uint16_t bits) nogil:
cdef uint32_t f32_bits = <uint32_t>bits << 16
cdef float result
memcpy(&result, &f32_bits, 4)
return result


cdef class BFloat16:
cdef uint16_t _bits

def __init__(self, value):
if isinstance(value, BFloat16):
self._bits = (<BFloat16>value)._bits
else:
self._bits = float32_to_bfloat16_bits(<float>float(value))

@staticmethod
def from_bits(uint16_t bits):
cdef BFloat16 bf16 = BFloat16.__new__(BFloat16)
bf16._bits = bits
return bf16

def to_bits(self):
return self._bits

def to_float32(self):
return bfloat16_bits_to_float32(self._bits)

def __float__(self):
return float(self.to_float32())

def __repr__(self):
return f"BFloat16({self.to_float32()})"

def __str__(self):
return str(self.to_float32())

def __eq__(self, other):
if isinstance(other, BFloat16):
if self.is_nan() or (<BFloat16>other).is_nan():
return False
if self.is_zero() and (<BFloat16>other).is_zero():
return True
return self._bits == (<BFloat16>other)._bits
return False

def __hash__(self):
return hash(self._bits)

def is_nan(self):
cdef uint16_t exp = (self._bits >> 7) & 0xFF
cdef uint16_t mant = self._bits & 0x7F
return exp == 0xFF and mant != 0

def is_inf(self):
cdef uint16_t exp = (self._bits >> 7) & 0xFF
cdef uint16_t mant = self._bits & 0x7F
return exp == 0xFF and mant == 0

def is_zero(self):
return (self._bits & 0x7FFF) == 0

def is_finite(self):
cdef uint16_t exp = (self._bits >> 7) & 0xFF
return exp != 0xFF

def is_normal(self):
cdef uint16_t exp = (self._bits >> 7) & 0xFF
return exp != 0 and exp != 0xFF

def is_subnormal(self):
cdef uint16_t exp = (self._bits >> 7) & 0xFF
cdef uint16_t mant = self._bits & 0x7F
return exp == 0 and mant != 0

def signbit(self):
return (self._bits & 0x8000) != 0
76 changes: 76 additions & 0 deletions python/pyfory/bfloat16_array.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import array

from pyfory.bfloat16 import BFloat16


class BFloat16Array:
def __init__(self, values=None):
if values is None:
self._data = array.array("H")
else:
self._data = array.array("H", [BFloat16(v).to_bits() if not isinstance(v, BFloat16) else v.to_bits() for v in values])

def __len__(self):
return len(self._data)

def __getitem__(self, index):
return BFloat16.from_bits(self._data[index])

def __setitem__(self, index, value):
if isinstance(value, BFloat16):
self._data[index] = value.to_bits()
else:
self._data[index] = BFloat16(value).to_bits()

def __iter__(self):
for bits in self._data:
yield BFloat16.from_bits(bits)

def __repr__(self):
return f"BFloat16Array([{', '.join(str(bf16) for bf16 in self)}])"

def __eq__(self, other):
if not isinstance(other, BFloat16Array):
return False
return self._data == other._data

def append(self, value):
if isinstance(value, BFloat16):
self._data.append(value.to_bits())
else:
self._data.append(BFloat16(value).to_bits())

def extend(self, values):
for value in values:
self.append(value)

@property
def itemsize(self):
return 2

def tobytes(self):
return self._data.tobytes()

@classmethod
def frombytes(cls, data):
arr = cls()
arr._data = array.array("H")
arr._data.frombytes(data)
return arr
4 changes: 4 additions & 0 deletions python/pyfory/buffer.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ cdef class Buffer:

cpdef inline write_float64(self, double value)

cpdef inline write_bfloat16(self, uint16_t value)

cpdef inline skip(self, int32_t length)

cpdef inline c_bool read_bool(self)
Expand Down Expand Up @@ -158,6 +160,8 @@ cdef class Buffer:

cpdef inline double read_float64(self)

cpdef inline uint16_t read_bfloat16(self)

cpdef inline write_varint64(self, int64_t v)

cpdef inline write_var_uint64(self, int64_t v)
Expand Down
8 changes: 8 additions & 0 deletions python/pyfory/buffer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,14 @@ cdef class Buffer:
cpdef inline write_float64(self, double value):
self.c_buffer.write_double(value)

cpdef inline write_bfloat16(self, uint16_t value):
self.c_buffer.write_uint16(value)

cpdef inline uint16_t read_bfloat16(self):
cdef uint16_t value = self.c_buffer.read_uint16(self._error)
self._raise_if_error()
return value

cpdef put_buffer(self, uint32_t offset, v, int32_t src_index, int32_t length):
if length == 0: # access an emtpy buffer may raise out-of-bound exception.
return
Expand Down
Loading
Loading