qsbase
diff --git a/‎CHANGELOG.md‎
Lines changed: 32 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/experimental/full_graph.md‎
Lines changed: 0 additions & 32 deletions b/‎docs/experimental/full_graph.md‎
Lines changed: 0 additions & 32 deletions
diff --git a/‎docs/perf_experiments.md‎
Lines changed: 0 additions & 105 deletions b/‎docs/perf_experiments.md‎
Lines changed: 0 additions & 105 deletions
diff --git a/‎docs/qdata_spec.md‎
Lines changed: 23 additions & 1 deletion b/‎docs/qdata_spec.md‎
Lines changed: 23 additions & 1 deletion
diff --git a/‎include/io/block_module.h‎
Lines changed: 17 additions & 6 deletions b/‎include/io/block_module.h‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎include/io/filestream_module.h‎
Lines changed: 7 additions & 3 deletions b/‎include/io/filestream_module.h‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎include/io/io_common.h‎
Lines changed: 11 additions & 10 deletions b/‎include/io/io_common.h‎
Lines changed: 11 additions & 10 deletions
diff --git a/‎include/io/multithreaded_block_module.h‎
Lines changed: 14 additions & 6 deletions b/‎include/io/multithreaded_block_module.h‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎include/io/xxhash_module.h‎
Lines changed: 1 addition & 1 deletion b/‎include/io/xxhash_module.h‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,32 @@
+## 1.1.0 - 2026-04-08
+
+### Format compatibility and limits
+
+- enforced the R-compatible size limits on both read and write:
+  - vector and list lengths capped to the R-compatible `R_XLEN_T_MAX` range
+  - attribute counts capped to the R-compatible `R_LEN_T_MAX` / `INT_MAX` range
+  - string payload and attribute-name lengths capped to `INT_MAX`
+- added native recursion-depth protection with a configurable `max_depth`
+  parameter and a default of `512`
+- documented that native `qdata-cpp` preserves attributes structurally as
+  `name + object` pairs and does not try to emulate R's special attribute-setter
+  semantics
+
+### Serialization internals
+
+- replaced the old templated in-memory writer core with a shared erased writer
+  path
+- kept the public templated buffer-facing serialize surface on top of that
+  erased writer implementation
+- tightened the installed include tree so the standalone `include/` root is
+  self-contained for downstream consumers
+
+### Testing and vendoring
+
+- added native regression coverage for the compatibility limits and `max_depth`
+  behavior
+- updated the vendored `xxHash` copy from `0.8.2` to `0.8.3`
+
+## 1.0.0 - 2026-04-07 - commit `1d21f34dbcaa`
+
+- initial release
@@ -100,6 +100,12 @@ if(QDATA_BUILD_TESTS)
     add_test(NAME qdata_buffer_api COMMAND qdata_buffer_api)
     set_tests_properties(qdata_buffer_api PROPERTIES TIMEOUT 120)
 
+    add_executable(qdata_compat_limits tests/cpp/compat_limits.cpp)
+    target_compile_features(qdata_compat_limits PRIVATE cxx_std_17)
+    target_link_libraries(qdata_compat_limits PRIVATE qdata)
+    add_test(NAME qdata_compat_limits COMMAND qdata_compat_limits)
+    set_tests_properties(qdata_compat_limits PROPERTIES TIMEOUT 120)
+
     find_program(RSCRIPT_EXECUTABLE Rscript REQUIRED)
     execute_process(
         COMMAND ${RSCRIPT_EXECUTABLE} -e
 
@@ -106,9 +106,31 @@ real_vector rvec_moved = get<real_vector>(std::move(obj)); // moves the real_vec
 - `string_ref` exposes `is_na()` and `view()`, and implicitly converts to `std::string_view`.
 - That implicit conversion is lossy for `NA` and yields an empty view.
 
+## Compatibility Limits
+
+qdata-cpp uses R-compatible size limits for serialized structure sizes:
+
+- vector and list lengths are limited to `R_XLEN_T_MAX` compatibility (`2^52`)
+- attribute counts are limited to `R_LEN_T_MAX` / `INT_MAX`
+- string payload lengths and attribute-name lengths are limited to `INT_MAX`
+
+These limits apply on both read and write. Native qdata-cpp therefore stays within the same intended R-compatible format subset instead of emitting or materializing larger structures that the R layer would later reject.
+
+## Nesting Limit
+
+qdata-cpp recursive read and write traversal uses a `max_depth` budget with a default of `512`.
+
+This applies to nested list structure and recursively nested attribute values. The library rejects deeper inputs or objects instead of relying on unbounded native call-stack recursion.
+
+## Attribute Semantics
+
+Native qdata-cpp preserves attributes structurally as `name + object` pairs on the native object model.
+
+It does not try to emulate R's special attribute setter semantics for attributes such as `dim`, `dimnames`, `class`, `tsp`, `row.names`, or `names`. Those semantics are interpreted in the R layer, not in the native qdata-cpp object model.
+
 ## Write-side traits
 
-`C++ -> qdata` is more permissive than the read interface. It serializes directly from the source object whenever possible, and recurses naturally through nested containers.
+`C++ -> qdata` is more permissive than the read interface. It serializes directly from the source object whenever possible, and recurses naturally through nested containers, within the compatibility and nesting limits described above.
 
 The write side is organized around four ideas:
 
 
@@ -1,8 +1,8 @@
 #ifndef _QS2_BLOCK_MODULE_H
 #define _QS2_BLOCK_MODULE_H
 
-#include "io/io_common.h"
-#include "io/xxhash_module.h"
+#include "io_common.h"
+#include "xxhash_module.h"
 
 // direct_mem switch does nothing, but is kept for parity with MT code
 template <class stream_writer, class compressor, class hasher, class error_policy, bool direct_mem>
@@ -130,9 +130,13 @@ struct BlockCompressReader {
         if(!ok) {
             cleanup_and_throw("Unexpected end of file while reading next block size");
         }
+        const uint32_t zbytes = compressed_block_size(zsize);
+        if(!compressed_block_size_fits_buffer(zsize)) {
+            cleanup_and_throw("Compressed block size exceeds internal maximum");
+        }
         hp.update(zsize);
-        uint32_t bytes_read = myFile.read(zblock.get(), zsize & (~BLOCK_METADATA));
-        if(bytes_read != (zsize & (~BLOCK_METADATA))) {
+        uint32_t bytes_read = myFile.read(zblock.get(), zbytes);
+        if(bytes_read != zbytes) {
             cleanup_and_throw("Unexpected end of file while reading next block");
         }
         hp.update(zblock.get(), bytes_read);
@@ -145,9 +149,13 @@ struct BlockCompressReader {
         if(!ok) {
             cleanup_and_throw("Unexpected end of file while reading next block size");
         }
+        const uint32_t zbytes = compressed_block_size(zsize);
+        if(!compressed_block_size_fits_buffer(zsize)) {
+            cleanup_and_throw("Compressed block size exceeds internal maximum");
+        }
         hp.update(zsize);
-        uint32_t bytes_read = myFile.read(zblock.get(), zsize & (~BLOCK_METADATA));
-        if(bytes_read != (zsize & (~BLOCK_METADATA))) {
+        uint32_t bytes_read = myFile.read(zblock.get(), zbytes);
+        if(bytes_read != zbytes) {
             cleanup_and_throw("Unexpected end of file while reading next block");
         }
         hp.update(zblock.get(), bytes_read);
@@ -197,6 +205,9 @@ struct BlockCompressReader {
             std::memcpy(outbuffer, block.get()+data_offset, bytes_accounted);
             while(len - bytes_accounted >= MAX_BLOCKSIZE) {
                 decompress_direct(outbuffer + bytes_accounted);
+                if(current_blocksize != MAX_BLOCKSIZE) {
+                    cleanup_and_throw("Corrupted block data");
+                }
                 bytes_accounted += MAX_BLOCKSIZE;
                 data_offset = MAX_BLOCKSIZE;
             }
 
@@ -2,7 +2,7 @@
 #ifndef _QS2_FILESTREAM_MODULE_H
 #define _QS2_FILESTREAM_MODULE_H
 
-#include "io/io_common.h"
+#include "io_common.h"
 
 // in binary mode, seek/tell should be byte offsets from beginning of the file
 // libstdc++ uses file descriptors under the hood for std::fstream:
@@ -35,7 +35,11 @@ struct IfStreamReader {
 
 struct OfStreamWriter {
     std::ofstream con;
-    OfStreamWriter(const char * const path) : con(path, std::ios::out | std::ios::binary) {}
+    OfStreamWriter(const char * const path) : con(path, std::ios::out | std::ios::binary) {
+        if(con.is_open()) {
+            con.exceptions(std::ios::failbit | std::ios::badbit);
+        }
+    }
     bool isValid() { return con.is_open(); }
     uint32_t write(const char * const ptr, const uint32_t count) {
         con.write(ptr, count);
@@ -49,4 +53,4 @@ struct OfStreamWriter {
     uint64_t tellp() { return con.tellp(); }
 };
 
-#endif
+#endif
@@ -11,31 +11,32 @@
 
 #include "zstd.h"
 #define XXH_INLINE_ALL
-#include "xxhash/xxhash.h"
+#include "../xxhash/xxhash.h"
 #undef XXH_INLINE_ALL
 
-#include "blosc/shuffle_routines.h"
-#include "blosc/unshuffle_routines.h"
+#include "../blosc/shuffle_routines.h"
+#include "../blosc/unshuffle_routines.h"
 
-#ifdef QS2_DYNAMIC_BLOCKSIZE
-static uint64_t MAX_BLOCKSIZE = 1048576ULL;
-static constexpr uint64_t BLOCK_RESERVE = 64ULL;
-static uint64_t MIN_BLOCKSIZE = MAX_BLOCKSIZE - BLOCK_RESERVE; // smallest allowable block size, except for last block
-static uint64_t MAX_ZBLOCKSIZE = ZSTD_compressBound(MAX_BLOCKSIZE);
-#else
 static constexpr uint32_t MAX_BLOCKSIZE = 1048576UL;
 static constexpr uint32_t BLOCK_RESERVE = 64UL;
 static constexpr uint32_t MIN_BLOCKSIZE = MAX_BLOCKSIZE - BLOCK_RESERVE; // smallest allowable block size, except for last block
 static const uint32_t MAX_ZBLOCKSIZE = ZSTD_compressBound(MAX_BLOCKSIZE);
 // 2^20 ... we save blocksize as uint32_t, so the last 12 MSBs can be used to store metadata
 // This blocksize is 2x larger than `qs` and seems to be a better tradeoff overall in benchmarks
-#endif
 
 // 11111111 11110000 00000000 00000000 in binary, First 12 MSBs can be used for metadata in either zblock or block
 // currently only using the first bit for metadata
 static constexpr uint32_t BLOCK_METADATA = 0x80000000; // 10000000 00000000 00000000 00000000
 static constexpr uint32_t SHUFFLE_MASK = (1ULL << 31);
 
+inline constexpr uint32_t compressed_block_size(const uint32_t zsize) noexcept {
+    return zsize & (~BLOCK_METADATA);
+}
+
+inline constexpr bool compressed_block_size_fits_buffer(const uint32_t zsize) noexcept {
+    return static_cast<uint64_t>(compressed_block_size(zsize)) <= MAX_ZBLOCKSIZE;
+}
+
 // MAKE_UNIQUE_BLOCK and MAKE_SHARED_BLOCK macros should be used ONLY in initializer lists
 #if __cplusplus >= 201402L // Check for C++14 or above
     #define MAKE_UNIQUE_BLOCK(SIZE) std::make_unique<char[]>(SIZE)
 
@@ -1,9 +1,9 @@
 #ifndef _QIO_MULTITHREADED_BLOCK_MODULE_H
 #define _QIO_MULTITHREADED_BLOCK_MODULE_H
 
-#include "io/io_common.h"
-#include "io/tbb_flow_compat.h"
-#include "io/xxhash_module.h"
+#include "io_common.h"
+#include "tbb_flow_compat.h"
+#include "xxhash_module.h"
 
 #include <atomic>
 #include <string>
@@ -291,11 +291,16 @@ struct BlockCompressReaderMT {
             end_of_file.store(true);
             return false;
         }
+        const uint32_t zbytes = compressed_block_size(zsize);
+        if(!compressed_block_size_fits_buffer(zsize)) {
+            tgc.cancel_group_execution();
+            return false;
+        }
         if(!available_zblocks.try_pop(zblock.block)) {
             zblock.block = MAKE_SHARED_BLOCK_ASSIGNMENT(MAX_ZBLOCKSIZE);
         }
-        uint32_t bytes_read = this->myFile.read(zblock.block.get(), zsize & (~BLOCK_METADATA));
-        if(bytes_read != (zsize & (~BLOCK_METADATA))) {
+        uint32_t bytes_read = this->myFile.read(zblock.block.get(), zbytes);
+        if(bytes_read != zbytes) {
             end_of_file.store(true);
             return false;
         }
@@ -374,7 +379,10 @@ struct BlockCompressReaderMT {
             std::memcpy(outbuffer, current_block.get()+data_offset, bytes_accounted);
             while(len - bytes_accounted >= MAX_BLOCKSIZE) {
                 get_new_block();
-                std::memcpy(outbuffer + bytes_accounted, current_block.get(), current_blocksize);
+                if(current_blocksize != MAX_BLOCKSIZE) {
+                    cleanup_and_throw("Corrupted block data");
+                }
+                std::memcpy(outbuffer + bytes_accounted, current_block.get(), MAX_BLOCKSIZE);
                 bytes_accounted += MAX_BLOCKSIZE;
                 data_offset = MAX_BLOCKSIZE;
             }
 
@@ -1,7 +1,7 @@
 #ifndef _QS2_XXHASH_MODULE_H
 #define _QS2_XXHASH_MODULE_H
 
-#include "io/io_common.h"
+#include "io_common.h"
 
 struct xxHashEnv {
     XXH3_state_t* state;