Skip to content

Commit 262692d

Browse files
committed
chore: add histogram buckets option on DataBaseBuilder
1 parent 2eb03c6 commit 262692d

4 files changed

Lines changed: 93 additions & 11 deletions

File tree

src/binder/analyze.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ impl<T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'_, '_, T, A>
4545
Operator::Analyze(AnalyzeOperator {
4646
table_name,
4747
index_metas,
48+
histogram_buckets: None,
4849
}),
4950
Childrens::Only(Box::new(scan_op)),
5051
))

src/db.rs

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline;
3030
use crate::optimizer::rule::implementation::ImplementationRuleImpl;
3131
use crate::optimizer::rule::normalization::NormalizationRuleImpl;
3232
use crate::parser::parse_sql;
33+
use crate::planner::operator::Operator;
3334
use crate::planner::LogicalPlan;
3435
use crate::storage::memory::MemoryStorage;
3536
#[cfg(not(target_arch = "wasm32"))]
@@ -64,6 +65,7 @@ pub struct DataBaseBuilder {
6465
path: PathBuf,
6566
scala_functions: ScalaFunctions,
6667
table_functions: TableFunctions,
68+
histogram_buckets: Option<usize>,
6769
}
6870

6971
impl DataBaseBuilder {
@@ -72,6 +74,7 @@ impl DataBaseBuilder {
7274
path: path.into(),
7375
scala_functions: Default::default(),
7476
table_functions: Default::default(),
77+
histogram_buckets: None,
7578
};
7679
builder = builder.register_scala_function(CharLength::new("char_length".to_lowercase()));
7780
builder =
@@ -85,6 +88,11 @@ impl DataBaseBuilder {
8588
builder
8689
}
8790

91+
pub fn histogram_buckets(mut self, buckets: usize) -> Self {
92+
self.histogram_buckets = Some(buckets);
93+
self
94+
}
95+
8896
pub fn register_scala_function(mut self, function: Arc<dyn ScalarFunctionImpl>) -> Self {
8997
let summary = function.summary().clone();
9098

@@ -100,41 +108,72 @@ impl DataBaseBuilder {
100108
}
101109

102110
pub fn build_with_storage<T: Storage>(self, storage: T) -> Result<Database<T>, DatabaseError> {
103-
Self::_build::<T>(storage, self.scala_functions, self.table_functions)
111+
Self::_build::<T>(
112+
storage,
113+
self.scala_functions,
114+
self.table_functions,
115+
self.histogram_buckets,
116+
)
104117
}
105118

106119
#[cfg(target_arch = "wasm32")]
107120
pub fn build(self) -> Result<Database<MemoryStorage>, DatabaseError> {
108121
let storage = MemoryStorage::new();
109122

110-
Self::_build::<MemoryStorage>(storage, self.scala_functions, self.table_functions)
123+
Self::_build::<MemoryStorage>(
124+
storage,
125+
self.scala_functions,
126+
self.table_functions,
127+
self.histogram_buckets,
128+
)
111129
}
112130

113131
#[cfg(not(target_arch = "wasm32"))]
114132
pub fn build(self) -> Result<Database<RocksStorage>, DatabaseError> {
115133
let storage = RocksStorage::new(self.path)?;
116134

117-
Self::_build::<RocksStorage>(storage, self.scala_functions, self.table_functions)
135+
Self::_build::<RocksStorage>(
136+
storage,
137+
self.scala_functions,
138+
self.table_functions,
139+
self.histogram_buckets,
140+
)
118141
}
119142

120143
pub fn build_in_memory(self) -> Result<Database<MemoryStorage>, DatabaseError> {
121144
let storage = MemoryStorage::new();
122145

123-
Self::_build::<MemoryStorage>(storage, self.scala_functions, self.table_functions)
146+
Self::_build::<MemoryStorage>(
147+
storage,
148+
self.scala_functions,
149+
self.table_functions,
150+
self.histogram_buckets,
151+
)
124152
}
125153

126154
#[cfg(not(target_arch = "wasm32"))]
127155
pub fn build_optimistic(self) -> Result<Database<OptimisticRocksStorage>, DatabaseError> {
128156
let storage = OptimisticRocksStorage::new(self.path)?;
129157

130-
Self::_build::<OptimisticRocksStorage>(storage, self.scala_functions, self.table_functions)
158+
Self::_build::<OptimisticRocksStorage>(
159+
storage,
160+
self.scala_functions,
161+
self.table_functions,
162+
self.histogram_buckets,
163+
)
131164
}
132165

133166
fn _build<T: Storage>(
134167
storage: T,
135168
scala_functions: ScalaFunctions,
136169
table_functions: TableFunctions,
170+
histogram_buckets: Option<usize>,
137171
) -> Result<Database<T>, DatabaseError> {
172+
if matches!(histogram_buckets, Some(0)) {
173+
return Err(DatabaseError::InvalidValue(
174+
"histogram buckets must be >= 1".to_string(),
175+
));
176+
}
138177
let meta_cache = SharedLruCache::new(256, 8, RandomState::new())?;
139178
let table_cache = SharedLruCache::new(48, 4, RandomState::new())?;
140179
let view_cache = SharedLruCache::new(12, 4, RandomState::new())?;
@@ -149,6 +188,7 @@ impl DataBaseBuilder {
149188
table_cache,
150189
view_cache,
151190
optimizer_pipeline: default_optimizer_pipeline(),
191+
histogram_buckets,
152192
_p: Default::default(),
153193
}),
154194
})
@@ -260,6 +300,7 @@ pub(crate) struct State<S> {
260300
table_cache: TableCache,
261301
view_cache: ViewCache,
262302
optimizer_pipeline: HepOptimizerPipeline,
303+
histogram_buckets: Option<usize>,
263304
_p: PhantomData<S>,
264305
}
265306

@@ -312,11 +353,17 @@ impl<S: Storage> State<S> {
312353
/// Limit(1)
313354
/// Project(a,b)
314355
let source_plan = binder.bind(stmt)?;
315-
let best_plan = self
356+
let mut best_plan = self
316357
.optimizer_pipeline
317358
.instantiate(source_plan)
318359
.find_best(Some(&transaction.meta_loader(meta_cache)))?;
319360

361+
if let Operator::Analyze(op) = &mut best_plan.operator {
362+
if op.histogram_buckets.is_none() {
363+
op.histogram_buckets = self.histogram_buckets;
364+
}
365+
}
366+
320367
Ok(best_plan)
321368
}
322369

@@ -1036,4 +1083,17 @@ pub(crate) mod test {
10361083

10371084
Ok(())
10381085
}
1086+
1087+
#[test]
1088+
fn test_invalid_histogram_buckets() {
1089+
let temp_dir = TempDir::new().expect("unable to create temporary working directory");
1090+
let result = DataBaseBuilder::path(temp_dir.path())
1091+
.histogram_buckets(0)
1092+
.build();
1093+
1094+
assert!(matches!(
1095+
result,
1096+
Err(DatabaseError::InvalidValue(message)) if message == "histogram buckets must be >= 1"
1097+
));
1098+
}
10391099
}

src/execution/dml/analyze.rs

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pub struct Analyze {
4848
table_name: TableName,
4949
input: LogicalPlan,
5050
index_metas: Vec<IndexMetaRef>,
51+
histogram_buckets: Option<usize>,
5152
}
5253

5354
impl From<(AnalyzeOperator, LogicalPlan)> for Analyze {
@@ -56,6 +57,7 @@ impl From<(AnalyzeOperator, LogicalPlan)> for Analyze {
5657
AnalyzeOperator {
5758
table_name,
5859
index_metas,
60+
histogram_buckets,
5961
},
6062
input,
6163
): (AnalyzeOperator, LogicalPlan),
@@ -64,6 +66,7 @@ impl From<(AnalyzeOperator, LogicalPlan)> for Analyze {
6466
table_name,
6567
input,
6668
index_metas,
69+
histogram_buckets,
6770
}
6871
}
6972
}
@@ -79,6 +82,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Analyze {
7982
table_name,
8083
mut input,
8184
index_metas,
85+
histogram_buckets,
8286
} = self;
8387

8488
let schema = input.output_schema().clone();
@@ -99,6 +103,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Analyze {
99103
index_id: index.id,
100104
exprs: throw!(co, index.column_exprs(&table)),
101105
builder: HistogramBuilder::new(index, None),
106+
histogram_buckets,
102107
});
103108
}
104109

@@ -160,6 +165,7 @@ struct State {
160165
index_id: IndexId,
161166
exprs: Vec<ScalarExpression>,
162167
builder: HistogramBuilder,
168+
histogram_buckets: Option<usize>,
163169
}
164170

165171
impl Analyze {
@@ -177,15 +183,19 @@ impl Analyze {
177183
let mut active_index_paths = HashSet::new();
178184

179185
for State {
180-
index_id, builder, ..
186+
index_id,
187+
builder,
188+
histogram_buckets,
189+
..
181190
} in builders
182191
{
183192
let index_file = OsStr::new(&index_id.to_string()).to_os_string();
184193
let path = dir_path.join(&index_file);
185194
let temp_path = path.with_extension("tmp");
186195
let path_str: String = path.to_string_lossy().into();
187196

188-
let (histogram, sketch) = builder.build(DEFAULT_NUM_OF_BUCKETS)?;
197+
let (histogram, sketch) =
198+
builder.build(histogram_buckets.unwrap_or(DEFAULT_NUM_OF_BUCKETS))?;
189199
let meta = StatisticsMeta::new(histogram, sketch);
190200

191201
meta.to_file(&temp_path)?;
@@ -224,11 +234,15 @@ impl Analyze {
224234
let mut active_keys = HashSet::new();
225235

226236
for State {
227-
index_id, builder, ..
237+
index_id,
238+
builder,
239+
histogram_buckets,
240+
..
228241
} in builders
229242
{
230243
let key = format!("{prefix}/{index_id}");
231-
let (histogram, sketch) = builder.build(DEFAULT_NUM_OF_BUCKETS)?;
244+
let (histogram, sketch) =
245+
builder.build(histogram_buckets.unwrap_or(DEFAULT_NUM_OF_BUCKETS))?;
232246
let meta = StatisticsMeta::new(histogram, sketch);
233247
let encoded = meta.to_storage_string()?;
234248

@@ -300,7 +314,10 @@ mod test {
300314
fn test_statistics_meta() -> Result<(), DatabaseError> {
301315
let temp_dir = TempDir::new().expect("unable to create temporary working directory");
302316
let base_dir = require_statistics_base_dir();
303-
let kite_sql = DataBaseBuilder::path(temp_dir.path()).build()?;
317+
let buckets = 10;
318+
let kite_sql = DataBaseBuilder::path(temp_dir.path())
319+
.histogram_buckets(buckets)
320+
.build()?;
304321

305322
kite_sql
306323
.run("create table t1 (a int primary key, b int)")?
@@ -328,16 +345,19 @@ mod test {
328345

329346
assert_eq!(statistics_meta_pk_index.index_id(), 0);
330347
assert_eq!(statistics_meta_pk_index.histogram().values_len(), 101);
348+
assert_eq!(statistics_meta_pk_index.histogram().buckets_len(), buckets);
331349

332350
let statistics_meta_b_index = StatisticsMeta::from_file::<RocksTransaction>(&paths[1])?;
333351

334352
assert_eq!(statistics_meta_b_index.index_id(), 1);
335353
assert_eq!(statistics_meta_b_index.histogram().values_len(), 101);
354+
assert_eq!(statistics_meta_b_index.histogram().buckets_len(), buckets);
336355

337356
let statistics_meta_p_index = StatisticsMeta::from_file::<RocksTransaction>(&paths[2])?;
338357

339358
assert_eq!(statistics_meta_p_index.index_id(), 2);
340359
assert_eq!(statistics_meta_p_index.histogram().values_len(), 101);
360+
assert_eq!(statistics_meta_p_index.histogram().buckets_len(), buckets);
341361

342362
Ok(())
343363
}

src/planner/operator/analyze.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@ use kite_sql_serde_macros::ReferenceSerialization;
2020
pub struct AnalyzeOperator {
2121
pub table_name: TableName,
2222
pub index_metas: Vec<IndexMetaRef>,
23+
pub histogram_buckets: Option<usize>,
2324
}

0 commit comments

Comments
 (0)