apache · asadjan4611 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
diff --git a/BUILD b/BUILD
@@ -70,6 +70,17 @@ pyx_library(
     ],
 )
 
+pyx_library(
+    name = "bfloat16",
+    srcs = glob([
+        "python/pyfory/bfloat16.pyx",
+        "python/pyfory/__init__.py",
+    ]),
+    cc_kwargs = dict(
+        linkstatic = 1,
+    ),
+)
+
 pyx_library(
     name = "_format",
     srcs = glob(
@@ -96,6 +107,7 @@ genrule(
     name = "cp_fory_so",
     srcs = [
         ":python/pyfory/buffer.so",
+        ":python/pyfory/bfloat16.so",
         ":python/pyfory/lib/mmh3/mmh3.so",
         ":python/pyfory/format/_format.so",
         ":python/pyfory/serialization.so",
@@ -111,11 +123,13 @@ genrule(
         if [ "$${u_name: 0: 4}" == "MING" ] || [ "$${u_name: 0: 4}" == "MSYS" ]
         then
             cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory/buffer.pyd"
+            cp -f $(location python/pyfory/bfloat16.so) "$$WORK_DIR/python/pyfory/bfloat16.pyd"
             cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3/mmh3.pyd"
             cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format/_format.pyd"
             cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory/serialization.pyd"
         else
             cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory"
+            cp -f $(location python/pyfory/bfloat16.so) "$$WORK_DIR/python/pyfory"
             cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3"
             cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format"
             cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory"

@@ -490,6 +490,34 @@ fory.register(Person.class, "example.Person");
 Person person = (Person) fory.deserialize(binaryData);
 ```
 
+### BFloat16 Support
+
+`pyfory` supports `bfloat16` scalar values and `bfloat16` arrays in xlang mode:
+
+- Scalar type: `pyfory.BFloat16` (type id `18`)
+- Array type: `pyfory.BFloat16Array` (type id `54`)
+
+```python
+import pyfory
+from pyfory import BFloat16, BFloat16Array
+
+fory = pyfory.Fory(xlang=True, ref=False, strict=True)
+
+# Scalar bfloat16
+v = BFloat16(3.1415926)
+data = fory.serialize(v)
+out = fory.deserialize(data)
+print(float(out))
+
+# bfloat16 array
+arr = BFloat16Array([1.0, 2.5, -3.25])
+data = fory.serialize(arr)
+out = fory.deserialize(data)
+print(out)
+```
+
+`BFloat16Array` stores values in a packed `array('H')` representation and writes bytes in little-endian order for cross-language compatibility.
+
 ## 📊 Row Format - Zero-Copy Processing
 
 Apache Fury™ provides a random-access row format that enables reading nested fields from binary data without full deserialization. This drastically reduces overhead when working with large objects where only partial data access is needed. The format also supports memory-mapped files for ultra-low memory footprint.

@@ -50,6 +50,7 @@
     TaggedUint64Serializer,
     Float32Serializer,
     Float64Serializer,
+    BFloat16Serializer,
     StringSerializer,
     DateSerializer,
     TimestampSerializer,
@@ -88,6 +89,8 @@
     tagged_uint64,
     float32,
     float64,
+    bfloat16,
+    bfloat16_array,
     int8_array,
     uint8_array,
     int16_array,
@@ -118,6 +121,14 @@
 from pyfory.policy import DeserializationPolicy  # noqa: F401 # pylint: disable=unused-import
 from pyfory.buffer import Buffer  # noqa: F401 # pylint: disable=unused-import
 
+# BFloat16 support
+try:
+    from pyfory.bfloat16 import BFloat16  # noqa: F401
+    from pyfory.bfloat16_array import BFloat16Array  # noqa: F401
+except ImportError:
+    # Cython extensions not built yet
+    pass
+
 __version__ = "0.16.0.dev0"
 
 __all__ = [
@@ -151,6 +162,10 @@
     "tagged_uint64",
     "float32",
     "float64",
+    "BFloat16",
+    "BFloat16Array",
+    "bfloat16",
+    "bfloat16_array",
     "int8_array",
     "uint8_array",
     "int16_array",
@@ -192,6 +207,7 @@
     "TaggedUint64Serializer",
     "Float32Serializer",
     "Float64Serializer",
+    "BFloat16Serializer",
     "StringSerializer",
     "DateSerializer",
     "TimestampSerializer",

diff --git a/python/pyfory/_serializer.py b/python/pyfory/_serializer.py
@@ -52,6 +52,17 @@ def support_subclass(cls) -> bool:
         return False
 
 
+class XlangCompatibleSerializer(Serializer):
+    def __init__(self, fory, type_):
+        super().__init__(fory, type_)
+
+    def xwrite(self, buffer, value):
+        self.write(buffer, value)
+
+    def xread(self, buffer):
+        return self.read(buffer)
+
+
 class BooleanSerializer(Serializer):
     def write(self, buffer, value):
         buffer.write_bool(value)
@@ -232,6 +243,21 @@ def read(self, buffer):
         return buffer.read_double()
 
 
+class BFloat16Serializer(XlangCompatibleSerializer):
+    def write(self, buffer, value):
+        from pyfory.bfloat16 import BFloat16
+
+        if isinstance(value, BFloat16):
+            buffer.write_bfloat16(value.to_bits())
+        else:
+            buffer.write_bfloat16(BFloat16(value).to_bits())
+
+    def read(self, buffer):
+        from pyfory.bfloat16 import BFloat16
+
+        return BFloat16.from_bits(buffer.read_bfloat16())
+
+
 class StringSerializer(Serializer):
     def __init__(self, fory, type_):
         super().__init__(fory, type_)

@@ -0,0 +1,116 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+from libc.stdint cimport uint16_t, uint32_t
+from libc.string cimport memcpy
+
+cdef inline uint16_t float32_to_bfloat16_bits(float value) nogil:
+    cdef uint32_t f32_bits
+    memcpy(&f32_bits, &value, 4)
+    cdef uint16_t bf16_bits = <uint16_t>(f32_bits >> 16)
+    cdef uint16_t truncated = <uint16_t>(f32_bits & 0xFFFF)
+    if truncated > 0x8000:
+        bf16_bits += 1
+        if (bf16_bits & 0x7F80) == 0x7F80:
+            bf16_bits = (bf16_bits & 0x8000) | 0x7F80
+    elif truncated == 0x8000 and (bf16_bits & 1):
+        bf16_bits += 1
+        if (bf16_bits & 0x7F80) == 0x7F80:
+            bf16_bits = (bf16_bits & 0x8000) | 0x7F80
+    return bf16_bits
+
+cdef inline float bfloat16_bits_to_float32(uint16_t bits) nogil:
+    cdef uint32_t f32_bits = <uint32_t>bits << 16
+    cdef float result
+    memcpy(&result, &f32_bits, 4)
+    return result
+
+
+cdef class BFloat16:
+    cdef uint16_t _bits
+
+    def __init__(self, value):
+        if isinstance(value, BFloat16):
+            self._bits = (<BFloat16>value)._bits
+        else:
+            self._bits = float32_to_bfloat16_bits(<float>float(value))
+
+    @staticmethod
+    def from_bits(uint16_t bits):
+        cdef BFloat16 bf16 = BFloat16.__new__(BFloat16)
+        bf16._bits = bits
+        return bf16
+
+    def to_bits(self):
+        return self._bits
+
+    def to_float32(self):
+        return bfloat16_bits_to_float32(self._bits)
+
+    def __float__(self):
+        return float(self.to_float32())
+
+    def __repr__(self):
+        return f"BFloat16({self.to_float32()})"
+
+    def __str__(self):
+        return str(self.to_float32())
+
+    def __eq__(self, other):
+        if isinstance(other, BFloat16):
+            if self.is_nan() or (<BFloat16>other).is_nan():
+                return False
+            if self.is_zero() and (<BFloat16>other).is_zero():
+                return True
+            return self._bits == (<BFloat16>other)._bits
+        return False
+
+    def __hash__(self):
+        return hash(self._bits)
+
+    def is_nan(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        cdef uint16_t mant = self._bits & 0x7F
+        return exp == 0xFF and mant != 0
+
+    def is_inf(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        cdef uint16_t mant = self._bits & 0x7F
+        return exp == 0xFF and mant == 0
+
+    def is_zero(self):
+        return (self._bits & 0x7FFF) == 0
+
+    def is_finite(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        return exp != 0xFF
+
+    def is_normal(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        return exp != 0 and exp != 0xFF
+
+    def is_subnormal(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        cdef uint16_t mant = self._bits & 0x7F
+        return exp == 0 and mant != 0
+
+    def signbit(self):
+        return (self._bits & 0x8000) != 0
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import array
+
+from pyfory.bfloat16 import BFloat16
+
+
+class BFloat16Array:
+    def __init__(self, values=None):
+        if values is None:
+            self._data = array.array("H")
+        else:
+            self._data = array.array("H", [BFloat16(v).to_bits() if not isinstance(v, BFloat16) else v.to_bits() for v in values])
+
+    def __len__(self):
+        return len(self._data)
+
+    def __getitem__(self, index):
+        return BFloat16.from_bits(self._data[index])
+
+    def __setitem__(self, index, value):
+        if isinstance(value, BFloat16):
+            self._data[index] = value.to_bits()
+        else:
+            self._data[index] = BFloat16(value).to_bits()
+
+    def __iter__(self):
+        for bits in self._data:
+            yield BFloat16.from_bits(bits)
+
+    def __repr__(self):
+        return f"BFloat16Array([{', '.join(str(bf16) for bf16 in self)}])"
+
+    def __eq__(self, other):
+        if not isinstance(other, BFloat16Array):
+            return False
+        return self._data == other._data
+
+    def append(self, value):
+        if isinstance(value, BFloat16):
+            self._data.append(value.to_bits())
+        else:
+            self._data.append(BFloat16(value).to_bits())
+
+    def extend(self, values):
+        for value in values:
+            self.append(value)
+
+    @property
+    def itemsize(self):
+        return 2
+
+    def tobytes(self):
+        return self._data.tobytes()
+
+    @classmethod
+    def frombytes(cls, data):
+        arr = cls()
+        arr._data = array.array("H")
+        arr._data.frombytes(data)
+        return arr
diff --git a/python/pyfory/buffer.pxd b/python/pyfory/buffer.pxd
@@ -128,6 +128,8 @@ cdef class Buffer:
 
     cpdef inline write_float64(self, double value)
 
+    cpdef inline write_bfloat16(self, uint16_t value)
+
     cpdef inline skip(self, int32_t length)
 
     cpdef inline c_bool read_bool(self)
@@ -158,6 +160,8 @@ cdef class Buffer:
 
     cpdef inline double read_float64(self)
 
+    cpdef inline uint16_t read_bfloat16(self)
+
     cpdef inline write_varint64(self, int64_t v)
 
     cpdef inline write_var_uint64(self, int64_t v)

diff --git a/python/pyfory/buffer.pyx b/python/pyfory/buffer.pyx
@@ -244,6 +244,14 @@ cdef class Buffer:
     cpdef inline write_float64(self, double value):
         self.c_buffer.write_double(value)
 
+    cpdef inline write_bfloat16(self, uint16_t value):
+        self.c_buffer.write_uint16(value)
+
+    cpdef inline uint16_t read_bfloat16(self):
+        cdef uint16_t value = self.c_buffer.read_uint16(self._error)
+        self._raise_if_error()
+        return value
+
     cpdef put_buffer(self, uint32_t offset, v, int32_t src_index, int32_t length):
         if length == 0:  # access an emtpy buffer may raise out-of-bound exception.
             return