From e532b7fefa8ed732c2e351b10d729fcf37c1f35f Mon Sep 17 00:00:00 2001 From: lani_karrot Date: Sat, 25 Apr 2026 17:12:02 +0900 Subject: [PATCH 1/4] fix: respect negative values on deserialization in REQ sketch --- .../org/apache/datasketches/req/ReqSerDe.java | 2 +- .../datasketches/req/ReqCompactorTest.java | 43 +++++++++++++++++++ .../req/ReqSketchCrossLanguageTest.java | 24 +++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java index 8f343e20a..dac8d23d1 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java +++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java @@ -205,7 +205,7 @@ static final Compactor extractCompactor(final PositionalSegment posSeg, final bo final float[] arr = new float[count]; posSeg.getFloatArray(arr, 0, count); float minItem = Float.MAX_VALUE; - float maxItem = Float.MIN_VALUE; + float maxItem = -Float.MAX_VALUE; for (int i = 0; i < count; i++) { minItem = min(minItem, arr[i]); maxItem = max(maxItem, arr[i]); diff --git a/src/test/java/org/apache/datasketches/req/ReqCompactorTest.java b/src/test/java/org/apache/datasketches/req/ReqCompactorTest.java index 5839f0a58..53430d18c 100644 --- a/src/test/java/org/apache/datasketches/req/ReqCompactorTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqCompactorTest.java @@ -101,4 +101,47 @@ private static void checkSerDeImpl(final int k, final boolean hra) { assertEquals(fbuf2.getDelta(), delta); assertTrue(fbuf.isEqualTo(fbuf2)); } + + @Test + public void checkSerDeWithNegativeValues() { + checkSerDeNegativeImpl(12, false); + checkSerDeNegativeImpl(12, true); + } + + private static void checkSerDeNegativeImpl(final int k, final boolean hra) { + final ReqCompactor c1 = new ReqCompactor((byte)0, hra, k, null); + final int nomCap = 2 * 3 * k; + final FloatBuffer fbuf = c1.getBuffer(); + + for (int i = 1; i <= nomCap; i++) { + fbuf.append(-i); //all negative values + } + final byte[] c1ser = c1.toByteArray(); + final PositionalSegment posSeg = PositionalSegment.wrap(MemorySegment.ofArray(c1ser)); + final Compactor compactor = ReqSerDe.extractCompactor(posSeg, fbuf.isSorted(), hra); + assertEquals(compactor.minItem, -nomCap); + assertEquals(compactor.maxItem, -1f); + } + + @Test + public void checkSerDeWithMixedValues() { + checkSerDeMixedImpl(12, false); + checkSerDeMixedImpl(12, true); + } + + private static void checkSerDeMixedImpl(final int k, final boolean hra) { + final ReqCompactor c1 = new ReqCompactor((byte)0, hra, k, null); + final int nomCap = 2 * 3 * k; + final int half = nomCap / 2; + final FloatBuffer fbuf = c1.getBuffer(); + + for (int i = 0; i < nomCap; i++) { + fbuf.append(i - half); // range: -half to half-1 + } + final byte[] c1ser = c1.toByteArray(); + final PositionalSegment posSeg = PositionalSegment.wrap(MemorySegment.ofArray(c1ser)); + final Compactor compactor = ReqSerDe.extractCompactor(posSeg, fbuf.isSorted(), hra); + assertEquals(compactor.minItem, (float) -half); + assertEquals(compactor.maxItem, (float) (half - 1)); + } } diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java index 0e7fbee59..57f8a827d 100644 --- a/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java @@ -51,6 +51,30 @@ public void generateBinariesForCompatibilityTesting() throws IOException { } } + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateNegativeBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {1, 10}; + for (final int n: nArr) { + final ReqSketch sk = ReqSketch.builder().build(); + for (int i = 1; i <= n; i++) { + sk.update(-i); + } + putBytesToJavaPath("req_float_negative_n" + n + "_java.sk", sk.toByteArray()); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateMixedBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {1, 10}; + for (final int n: nArr) { + final ReqSketch sk = ReqSketch.builder().build(); + for (int i = -n; i <= n; i++) { + sk.update(i); + } + putBytesToJavaPath("req_float_mixed_n" + n + "_java.sk", sk.toByteArray()); + } + } + @Test(groups = {CHECK_CPP_FILES}) public void deserializeFromCpp() throws IOException { final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; From 6a514db312af307f0f6fb32262ec7e4b868d5ca3 Mon Sep 17 00:00:00 2001 From: lani_karrot Date: Sat, 25 Apr 2026 17:35:55 +0900 Subject: [PATCH 2/4] test: add cases from C++ deserialize --- .../req/ReqSketchCrossLanguageTest.java | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java index 57f8a827d..d197d7ae0 100644 --- a/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java @@ -99,4 +99,47 @@ public void deserializeFromCpp() throws IOException { } } + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeNegativeFromCpp() throws IOException { + final int[] nArr = {1, 10}; + for (final int n: nArr) { + final byte[] bytes = getFileBytes(cppPath, "req_float_negative_n" + n + "_cpp.sk"); + final ReqSketch sk = ReqSketch.heapify(MemorySegment.ofArray(bytes)); + assertTrue(!sk.isEmpty()); + assertEquals(sk.getN(), n); + assertEquals(sk.getMinItem(), -n); + assertEquals(sk.getMaxItem(), -1); + final QuantilesFloatsSketchIterator it = sk.iterator(); + long weight = 0; + while(it.next()) { + assertTrue(it.getQuantile() >= sk.getMinItem()); + assertTrue(it.getQuantile() <= sk.getMaxItem()); + weight += it.getWeight(); + } + assertEquals(weight, n); + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeMixedFromCpp() throws IOException { + final int[] nArr = {1, 10}; + for (final int n: nArr) { + final byte[] bytes = getFileBytes(cppPath, "req_float_mixed_n" + n + "_cpp.sk"); + final ReqSketch sk = ReqSketch.heapify(MemorySegment.ofArray(bytes)); + assertTrue(!sk.isEmpty()); + final int expectedN = 2 * n + 1; // range -n to n inclusive + assertEquals(sk.getN(), expectedN); + assertEquals(sk.getMinItem(), -n); + assertEquals(sk.getMaxItem(), (float) n); + final QuantilesFloatsSketchIterator it = sk.iterator(); + long weight = 0; + while(it.next()) { + assertTrue(it.getQuantile() >= sk.getMinItem()); + assertTrue(it.getQuantile() <= sk.getMaxItem()); + weight += it.getWeight(); + } + assertEquals(weight, expectedN); + } + } + } From 8535219194ba685f4abfe2ba9eab43bd4221a508 Mon Sep 17 00:00:00 2001 From: lani_karrot Date: Sat, 25 Apr 2026 20:21:57 +0900 Subject: [PATCH 3/4] test: remove c++ compat test cases --- .../req/ReqSketchCrossLanguageTest.java | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java index d197d7ae0..8e38b9b16 100644 --- a/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqSketchCrossLanguageTest.java @@ -98,48 +98,4 @@ public void deserializeFromCpp() throws IOException { } } } - - @Test(groups = {CHECK_CPP_FILES}) - public void deserializeNegativeFromCpp() throws IOException { - final int[] nArr = {1, 10}; - for (final int n: nArr) { - final byte[] bytes = getFileBytes(cppPath, "req_float_negative_n" + n + "_cpp.sk"); - final ReqSketch sk = ReqSketch.heapify(MemorySegment.ofArray(bytes)); - assertTrue(!sk.isEmpty()); - assertEquals(sk.getN(), n); - assertEquals(sk.getMinItem(), -n); - assertEquals(sk.getMaxItem(), -1); - final QuantilesFloatsSketchIterator it = sk.iterator(); - long weight = 0; - while(it.next()) { - assertTrue(it.getQuantile() >= sk.getMinItem()); - assertTrue(it.getQuantile() <= sk.getMaxItem()); - weight += it.getWeight(); - } - assertEquals(weight, n); - } - } - - @Test(groups = {CHECK_CPP_FILES}) - public void deserializeMixedFromCpp() throws IOException { - final int[] nArr = {1, 10}; - for (final int n: nArr) { - final byte[] bytes = getFileBytes(cppPath, "req_float_mixed_n" + n + "_cpp.sk"); - final ReqSketch sk = ReqSketch.heapify(MemorySegment.ofArray(bytes)); - assertTrue(!sk.isEmpty()); - final int expectedN = 2 * n + 1; // range -n to n inclusive - assertEquals(sk.getN(), expectedN); - assertEquals(sk.getMinItem(), -n); - assertEquals(sk.getMaxItem(), (float) n); - final QuantilesFloatsSketchIterator it = sk.iterator(); - long weight = 0; - while(it.next()) { - assertTrue(it.getQuantile() >= sk.getMinItem()); - assertTrue(it.getQuantile() <= sk.getMaxItem()); - weight += it.getWeight(); - } - assertEquals(weight, expectedN); - } - } - } From d67731a309a28e2c7bad4571b9c318c5599476b0 Mon Sep 17 00:00:00 2001 From: proost Date: Thu, 14 May 2026 00:28:52 +0900 Subject: [PATCH 4/4] fix: init with NaN --- .../org/apache/datasketches/req/ReqSerDe.java | 15 ++++++++----- .../datasketches/req/ReqCompactorTest.java | 22 +++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java index dac8d23d1..a5aba8343 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java +++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java @@ -20,7 +20,6 @@ package org.apache.datasketches.req; import static java.lang.Math.max; -import static java.lang.Math.min; import static java.lang.Math.round; import java.lang.foreign.MemorySegment; @@ -204,11 +203,17 @@ static final Compactor extractCompactor(final PositionalSegment posSeg, final bo final int count = posSeg.getInt(); final float[] arr = new float[count]; posSeg.getFloatArray(arr, 0, count); - float minItem = Float.MAX_VALUE; - float maxItem = -Float.MAX_VALUE; + float minItem = Float.NaN; + float maxItem = Float.NaN; for (int i = 0; i < count; i++) { - minItem = min(minItem, arr[i]); - maxItem = max(maxItem, arr[i]); + final float item = arr[i]; + if (Float.isNaN(minItem)) { + minItem = item; + maxItem = item; + } else { + if (item < minItem) { minItem = item; } + if (item > maxItem) { maxItem = item; } + } } final int delta = 2 * sectionSize * numSections; final int nomCap = 2 * delta; diff --git a/src/test/java/org/apache/datasketches/req/ReqCompactorTest.java b/src/test/java/org/apache/datasketches/req/ReqCompactorTest.java index 53430d18c..6c0ec20da 100644 --- a/src/test/java/org/apache/datasketches/req/ReqCompactorTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqCompactorTest.java @@ -144,4 +144,26 @@ private static void checkSerDeMixedImpl(final int k, final boolean hra) { assertEquals(compactor.minItem, (float) -half); assertEquals(compactor.maxItem, (float) (half - 1)); } + + @Test + public void checkSerDeWithInfiniteValues() { + checkSerDeInfiniteImpl(12, false); + checkSerDeInfiniteImpl(12, true); + } + + private static void checkSerDeInfiniteImpl(final int k, final boolean hra) { + final ReqCompactor c1 = new ReqCompactor((byte)0, hra, k, null); + final FloatBuffer fbuf = c1.getBuffer(); + + fbuf.append(Float.POSITIVE_INFINITY); + fbuf.append(1); + fbuf.append(Float.NEGATIVE_INFINITY); + fbuf.append(-1); + + final byte[] c1ser = c1.toByteArray(); + final PositionalSegment posSeg = PositionalSegment.wrap(MemorySegment.ofArray(c1ser)); + final Compactor compactor = ReqSerDe.extractCompactor(posSeg, fbuf.isSorted(), hra); + assertEquals(compactor.minItem, Float.NEGATIVE_INFINITY); + assertEquals(compactor.maxItem, Float.POSITIVE_INFINITY); + } }