Skip to content

Commit a653a86

Browse files
authored
[refactor](be) Replace std::unique_ptr with DorisUniqueBufferPtr in SingleValueDataString (#62374) (#62408)
Pick #62374 Problem Summary: SingleValueDataString uses std::unique_ptr<char[]> with new[] for large string storage, which bypasses the Doris memory tracking allocator. This replaces it with DorisUniqueBufferPtr<char> so that allocations go through Allocator and are properly tracked. None - Test: Unit Test (added SingleValueDataStringTest) - Behavior changed: No - Does this need documentation: No Issue Number: close #xxx Related PR: #xxx Problem Summary: None - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: apache/doris-website#1214 --> - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into --> ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: apache/doris-website#1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into -->
1 parent f291feb commit a653a86

2 files changed

Lines changed: 229 additions & 4 deletions

File tree

be/src/exprs/aggregate/aggregate_function_min_max.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "core/column/column_array.h"
3838
#include "core/column/column_fixed_length_object.h"
3939
#include "core/column/column_string.h"
40+
#include "core/custom_allocator.h"
4041
#include "core/data_type/data_type.h"
4142
#include "core/data_type/data_type_fixed_length_object.h"
4243
#include "core/data_type/data_type_string.h"
@@ -355,7 +356,7 @@ struct SingleValueDataString {
355356
// However, considering compatibility with future upgrades, no changes will be made here.
356357
Int32 size = -1; /// -1 indicates that there is no value.
357358
Int32 capacity = 0; /// power of two or zero
358-
std::unique_ptr<char[]> large_data;
359+
DorisUniqueBufferPtr<char> large_data;
359360

360361
public:
361362
static constexpr Int32 AUTOMATIC_STORAGE_SIZE = 64;
@@ -388,7 +389,7 @@ struct SingleValueDataString {
388389
if (size != -1) {
389390
size = -1;
390391
capacity = 0;
391-
large_data = nullptr;
392+
large_data.reset();
392393
}
393394
}
394395

@@ -415,7 +416,7 @@ struct SingleValueDataString {
415416
} else {
416417
if (capacity < rhs_size) {
417418
capacity = (Int32)round_up_to_power_of_two_or_zero(rhs_size);
418-
large_data.reset(new char[capacity]);
419+
large_data = DorisUniqueBufferPtr<char>(capacity);
419420
}
420421

421422
size = rhs_size;
@@ -443,7 +444,7 @@ struct SingleValueDataString {
443444
if (capacity < value_size) {
444445
/// Don't free large_data here.
445446
capacity = (Int32)round_up_to_power_of_two_or_zero(value_size);
446-
large_data.reset(new char[capacity]);
447+
large_data = DorisUniqueBufferPtr<char>(capacity);
447448
}
448449

449450
size = value_size;
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "exprs/aggregate/aggregate_function_min_max.h"
19+
20+
#include <gtest/gtest.h>
21+
22+
#include <string>
23+
24+
#include "core/arena.h"
25+
#include "core/column/column_string.h"
26+
#include "core/string_buffer.hpp"
27+
28+
namespace doris {
29+
30+
class SingleValueDataStringTest : public testing::Test {
31+
protected:
32+
Arena arena;
33+
};
34+
35+
TEST_F(SingleValueDataStringTest, DefaultNotHas) {
36+
SingleValueDataString data;
37+
ASSERT_FALSE(data.has());
38+
}
39+
40+
TEST_F(SingleValueDataStringTest, ResetWhenNoValue) {
41+
SingleValueDataString data;
42+
// reset on empty should not crash
43+
data.reset();
44+
ASSERT_FALSE(data.has());
45+
}
46+
47+
TEST_F(SingleValueDataStringTest, SmallStringChangeImpl) {
48+
SingleValueDataString data;
49+
std::string small = "hello";
50+
data.change_impl(StringRef(small.data(), small.size()), arena);
51+
ASSERT_TRUE(data.has());
52+
auto ref = data.get_string_ref();
53+
ASSERT_EQ(ref.size, small.size());
54+
ASSERT_EQ(std::string(ref.data, ref.size), small);
55+
}
56+
57+
TEST_F(SingleValueDataStringTest, LargeStringChangeImpl) {
58+
SingleValueDataString data;
59+
// Create a string larger than MAX_SMALL_STRING_SIZE
60+
std::string large(SingleValueDataString::MAX_SMALL_STRING_SIZE + 10, 'x');
61+
data.change_impl(StringRef(large.data(), large.size()), arena);
62+
ASSERT_TRUE(data.has());
63+
auto ref = data.get_string_ref();
64+
ASSERT_EQ(ref.size, large.size());
65+
ASSERT_EQ(std::string(ref.data, ref.size), large);
66+
}
67+
68+
TEST_F(SingleValueDataStringTest, ResetAfterChange) {
69+
SingleValueDataString data;
70+
std::string s = "test";
71+
data.change_impl(StringRef(s.data(), s.size()), arena);
72+
ASSERT_TRUE(data.has());
73+
data.reset();
74+
ASSERT_FALSE(data.has());
75+
}
76+
77+
TEST_F(SingleValueDataStringTest, ChangeIfLess) {
78+
SingleValueDataString data;
79+
std::string a = "banana";
80+
std::string b = "apple";
81+
82+
data.change_impl(StringRef(a.data(), a.size()), arena);
83+
ASSERT_EQ(std::string(data.get_string_ref().data, data.get_string_ref().size), a);
84+
85+
SingleValueDataString other;
86+
other.change_impl(StringRef(b.data(), b.size()), arena);
87+
88+
ASSERT_TRUE(data.change_if_less(other, arena));
89+
ASSERT_EQ(std::string(data.get_string_ref().data, data.get_string_ref().size), b);
90+
91+
// "apple" is not less than "apple"
92+
ASSERT_FALSE(data.change_if_less(other, arena));
93+
}
94+
95+
TEST_F(SingleValueDataStringTest, ChangeIfGreater) {
96+
SingleValueDataString data;
97+
std::string a = "apple";
98+
std::string b = "banana";
99+
100+
data.change_impl(StringRef(a.data(), a.size()), arena);
101+
102+
SingleValueDataString other;
103+
other.change_impl(StringRef(b.data(), b.size()), arena);
104+
105+
ASSERT_TRUE(data.change_if_greater(other, arena));
106+
ASSERT_EQ(std::string(data.get_string_ref().data, data.get_string_ref().size), b);
107+
108+
ASSERT_FALSE(data.change_if_greater(other, arena));
109+
}
110+
111+
TEST_F(SingleValueDataStringTest, ChangeFirstTime) {
112+
SingleValueDataString data;
113+
SingleValueDataString src;
114+
std::string s = "first";
115+
src.change_impl(StringRef(s.data(), s.size()), arena);
116+
117+
data.change_first_time(src, arena);
118+
ASSERT_TRUE(data.has());
119+
ASSERT_EQ(std::string(data.get_string_ref().data, data.get_string_ref().size), s);
120+
121+
// Second call should not change
122+
SingleValueDataString other;
123+
std::string s2 = "second";
124+
other.change_impl(StringRef(s2.data(), s2.size()), arena);
125+
data.change_first_time(other, arena);
126+
ASSERT_EQ(std::string(data.get_string_ref().data, data.get_string_ref().size), s);
127+
}
128+
129+
TEST_F(SingleValueDataStringTest, WriteReadSmallString) {
130+
SingleValueDataString data;
131+
std::string s = "serialize_me";
132+
data.change_impl(StringRef(s.data(), s.size()), arena);
133+
134+
// Write
135+
auto col_write = ColumnString::create();
136+
BufferWritable writer(*col_write);
137+
data.write(writer);
138+
writer.commit();
139+
140+
// Read
141+
auto ref = col_write->get_data_at(0);
142+
BufferReadable reader(ref);
143+
SingleValueDataString data2;
144+
data2.read(reader, arena);
145+
146+
ASSERT_TRUE(data2.has());
147+
ASSERT_EQ(std::string(data2.get_string_ref().data, data2.get_string_ref().size), s);
148+
}
149+
150+
TEST_F(SingleValueDataStringTest, WriteReadLargeString) {
151+
SingleValueDataString data;
152+
std::string s(SingleValueDataString::MAX_SMALL_STRING_SIZE + 20, 'L');
153+
data.change_impl(StringRef(s.data(), s.size()), arena);
154+
155+
auto col_write = ColumnString::create();
156+
BufferWritable writer(*col_write);
157+
data.write(writer);
158+
writer.commit();
159+
160+
auto ref = col_write->get_data_at(0);
161+
BufferReadable reader(ref);
162+
SingleValueDataString data2;
163+
data2.read(reader, arena);
164+
165+
ASSERT_TRUE(data2.has());
166+
ASSERT_EQ(std::string(data2.get_string_ref().data, data2.get_string_ref().size), s);
167+
}
168+
169+
TEST_F(SingleValueDataStringTest, WriteReadNoValue) {
170+
SingleValueDataString data;
171+
172+
auto col_write = ColumnString::create();
173+
BufferWritable writer(*col_write);
174+
data.write(writer);
175+
writer.commit();
176+
177+
auto ref = col_write->get_data_at(0);
178+
BufferReadable reader(ref);
179+
SingleValueDataString data2;
180+
data2.read(reader, arena);
181+
182+
ASSERT_FALSE(data2.has());
183+
}
184+
185+
TEST_F(SingleValueDataStringTest, InsertResultIntoWithValue) {
186+
SingleValueDataString data;
187+
std::string s = "result";
188+
data.change_impl(StringRef(s.data(), s.size()), arena);
189+
190+
auto col = ColumnString::create();
191+
data.insert_result_into(*col);
192+
ASSERT_EQ(col->size(), 1);
193+
auto ref = col->get_data_at(0);
194+
ASSERT_EQ(std::string(ref.data, ref.size), s);
195+
}
196+
197+
TEST_F(SingleValueDataStringTest, InsertResultIntoWithoutValue) {
198+
SingleValueDataString data;
199+
auto col = ColumnString::create();
200+
data.insert_result_into(*col);
201+
ASSERT_EQ(col->size(), 1);
202+
// Default is empty string
203+
auto ref = col->get_data_at(0);
204+
ASSERT_EQ(ref.size, 0);
205+
}
206+
207+
TEST_F(SingleValueDataStringTest, LargeStringRealloc) {
208+
SingleValueDataString data;
209+
// First large allocation
210+
std::string s1(SingleValueDataString::MAX_SMALL_STRING_SIZE + 10, 'A');
211+
data.change_impl(StringRef(s1.data(), s1.size()), arena);
212+
ASSERT_EQ(std::string(data.get_string_ref().data, data.get_string_ref().size), s1);
213+
214+
// Second larger allocation triggers realloc
215+
std::string s2(SingleValueDataString::MAX_SMALL_STRING_SIZE + 200, 'B');
216+
data.change_impl(StringRef(s2.data(), s2.size()), arena);
217+
ASSERT_EQ(std::string(data.get_string_ref().data, data.get_string_ref().size), s2);
218+
}
219+
220+
TEST_F(SingleValueDataStringTest, SizeStaticAssert) {
221+
static_assert(sizeof(SingleValueDataString) == SingleValueDataString::AUTOMATIC_STORAGE_SIZE);
222+
}
223+
224+
} // namespace doris

0 commit comments

Comments
 (0)