Skip to content

Commit f291feb

Browse files
gavinchougavinchou
andauthored
branch-4.1: [config](pick) Add max_bucket_num_per_partition config to limit bucket number #61576 #62286 (#62325)
## Summary Pick PR #61576 and #62286 to branch-4.1, combining both changes into one commit. ## Changes Add a new FE config `max_bucket_num_per_partition` to limit the maximum number of buckets when creating a table or adding a partition. Default value is 768. 1. Add `max_bucket_num_per_partition` config in Config.java, defaulting to 768 2. Add bucket number validation in `DistributionDescriptor.validate()` for CREATE TABLE scenario 3. Add bucket number validation in `InternalCatalog.addPartition()` for ALTER TABLE ADD PARTITION scenario 4. Add unit tests for the new validation logic 5. Add regression test configurations ## Behavior - For user-specified buckets (CREATE TABLE / ALTER TABLE ADD PARTITION): if bucket number exceeds this limit, the operation will be rejected with a helpful error message - For auto-bucket feature (Dynamic Partition): bucket number is capped by `autobucket_max_buckets` automatically, no change - Set to 0 or negative value to disable this limit ## Test - [x] FE unit tests passed locally (DistributionDescriptorTest) - [x] FE build passed locally ## Related Issue close #61576 close #62286 --------- Co-authored-by: gavinchou <gavinchou@apache.org>
1 parent 01fe81b commit f291feb

7 files changed

Lines changed: 167 additions & 2 deletions

File tree

fe/fe-common/src/main/java/org/apache/doris/common/Config.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2833,6 +2833,18 @@ public class Config extends ConfigBase {
28332833
})
28342834
public static int autobucket_max_buckets = 128;
28352835

2836+
@ConfField(description = {"限制创建表或添加分区时的最大分桶数。默认值为 768。"
2837+
+ "1. 对于用户指定的分桶数(CREATE TABLE / ALTER TABLE ADD PARTITION):如果分桶数超过此限制,操作将被拒绝并显示错误消息。"
2838+
+ "2. 对于自动分桶功能(动态分区):分桶数将自动限制在 autobucket_max_buckets。"
2839+
+ "设置为 0 或负值可禁用用户指定分桶数的限制。",
2840+
"Maximum number of buckets when creating a table or adding a partition. Defaults to 768."
2841+
+ "1. For user-specified buckets (CREATE TABLE / ALTER TABLE ADD PARTITION): "
2842+
+ "if bucket number exceeds this limit, the operation will be rejected with an error message. "
2843+
+ "2. For auto-bucket feature (Dynamic Partition): "
2844+
+ "bucket number will be capped at autobucket_max_buckets automatically. "
2845+
+ "Set to 0 or negative value to disable this limit for user-specified buckets."})
2846+
public static int max_bucket_num_per_partition = 768;
2847+
28362848
@ConfField(description = {"单个 FE 的 Arrow Flight Server 的最大连接数。",
28372849
"Maximal number of connections of Arrow Flight Server per FE."})
28382850
public static int arrow_flight_max_connections = 4096;

fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,16 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa
16051605
if (hashDistributionInfo.getBucketNum() <= 0) {
16061606
throw new DdlException("Cannot assign hash distribution buckets less than 1");
16071607
}
1608+
if (Config.max_bucket_num_per_partition > 0
1609+
&& hashDistributionInfo.getBucketNum() > Config.max_bucket_num_per_partition) {
1610+
throw new DdlException(String.format(
1611+
"Number of buckets (%d) exceeds the maximum allowed value (%d). "
1612+
+ "Generally, a large number of buckets is not needed. "
1613+
+ "If you have a specific use case requiring more buckets, "
1614+
+ "please review your schema design or modify the FE config "
1615+
+ "'max_bucket_num_per_partition' to adjust this limit.",
1616+
hashDistributionInfo.getBucketNum(), Config.max_bucket_num_per_partition));
1617+
}
16081618
if (!hashDistributionInfo.sameDistributionColumns((HashDistributionInfo) defaultDistributionInfo)) {
16091619
throw new DdlException("Cannot assign hash distribution with different distribution cols. "
16101620
+ "new is: " + hashDistributionInfo.getDistributionColumns() + " default is: "
@@ -1615,6 +1625,16 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa
16151625
if (randomDistributionInfo.getBucketNum() <= 0) {
16161626
throw new DdlException("Cannot assign random distribution buckets less than 1");
16171627
}
1628+
if (Config.max_bucket_num_per_partition > 0
1629+
&& randomDistributionInfo.getBucketNum() > Config.max_bucket_num_per_partition) {
1630+
throw new DdlException(String.format(
1631+
"Number of buckets (%d) exceeds the maximum allowed value (%d). "
1632+
+ "Generally, a large number of buckets is not needed. "
1633+
+ "If you have a specific use case requiring more buckets, "
1634+
+ "please review your schema design or modify the FE config "
1635+
+ "'max_bucket_num_per_partition' to adjust this limit.",
1636+
randomDistributionInfo.getBucketNum(), Config.max_bucket_num_per_partition));
1637+
}
16181638
}
16191639
} else {
16201640
// make sure partition-dristribution-info is deep copied from default-distribution-info

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.doris.analysis.RandomDistributionDesc;
2323
import org.apache.doris.catalog.AggregateType;
2424
import org.apache.doris.catalog.KeysType;
25+
import org.apache.doris.common.Config;
2526
import org.apache.doris.nereids.exceptions.AnalysisException;
2627

2728
import com.google.common.collect.Lists;
@@ -76,6 +77,20 @@ public void validate(Map<String, ColumnDefinition> columnMap, KeysType keysType)
7677
throw new AnalysisException(isHash ? "Number of hash distribution should be greater than zero."
7778
: "Number of random distribution should be greater than zero.");
7879
}
80+
81+
// Check bucket max limit for non-auto-bucket cases
82+
// auto bucket is limited by autobucket_max_buckets during calculation
83+
if (!isAutoBucket && Config.max_bucket_num_per_partition > 0
84+
&& bucketNum > Config.max_bucket_num_per_partition) {
85+
throw new AnalysisException(String.format(
86+
"Number of buckets (%d) exceeds the maximum allowed value (%d). "
87+
+ "Generally, a large number of buckets is not needed. "
88+
+ "If you have a specific use case requiring more buckets, "
89+
+ "please review your schema design or modify the FE config "
90+
+ "'max_bucket_num_per_partition' to adjust this limit.",
91+
bucketNum, Config.max_bucket_num_per_partition));
92+
}
93+
7994
if (isHash) {
8095
Set<String> colSet = Sets.newHashSet(cols);
8196
if (colSet.size() != cols.size()) {

fe/fe-core/src/test/java/org/apache/doris/clone/DecommissionTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public void testDecommissionBackend() throws Exception {
144144
// test colocate tablet repair
145145
String createStr = "create table test.tbl1\n"
146146
+ "(k1 date, k2 int)\n"
147-
+ "distributed by hash(k2) buckets 2400\n"
147+
+ "distributed by hash(k2) buckets 64\n"
148148
+ "properties\n"
149149
+ "(\n"
150150
+ " \"replication_num\" = \"1\"\n"
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.plans.commands.info;
19+
20+
import org.apache.doris.catalog.KeysType;
21+
import org.apache.doris.common.Config;
22+
import org.apache.doris.nereids.exceptions.AnalysisException;
23+
import org.apache.doris.nereids.types.IntegerType;
24+
25+
import com.google.common.collect.Lists;
26+
import com.google.common.collect.Maps;
27+
import org.junit.jupiter.api.Assertions;
28+
import org.junit.jupiter.api.Test;
29+
30+
import java.util.Map;
31+
32+
public class DistributionDescriptorTest {
33+
34+
private Map<String, ColumnDefinition> createColumnMap() {
35+
Map<String, ColumnDefinition> columnMap = Maps.newHashMap();
36+
ColumnDefinition col1 = new ColumnDefinition("col1", IntegerType.INSTANCE, false);
37+
columnMap.put("col1", col1);
38+
ColumnDefinition col2 = new ColumnDefinition("col2", IntegerType.INSTANCE, false);
39+
columnMap.put("col2", col2);
40+
return columnMap;
41+
}
42+
43+
@Test
44+
public void testBucketNumMaxLimit() {
45+
Map<String, ColumnDefinition> columnMap = createColumnMap();
46+
int originalValue = Config.max_bucket_num_per_partition;
47+
48+
try {
49+
// Test 1: normal bucket number within limit
50+
Config.max_bucket_num_per_partition = 100;
51+
DistributionDescriptor desc1 = new DistributionDescriptor(
52+
true, false, 50, Lists.newArrayList("col1"));
53+
desc1.validate(columnMap, KeysType.DUP_KEYS); // should not throw
54+
55+
// Test 2: bucket number exceeds limit
56+
DistributionDescriptor desc2 = new DistributionDescriptor(
57+
true, false, 150, Lists.newArrayList("col1"));
58+
AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
59+
() -> desc2.validate(columnMap, KeysType.DUP_KEYS));
60+
Assertions.assertTrue(ex.getMessage().contains("exceeds the maximum allowed value (100)"));
61+
Assertions.assertTrue(ex.getMessage().contains("max_bucket_num_per_partition"));
62+
63+
// Test 3: disable limit by setting to 0
64+
Config.max_bucket_num_per_partition = 0;
65+
DistributionDescriptor desc3 = new DistributionDescriptor(
66+
true, false, 10000, Lists.newArrayList("col1"));
67+
desc3.validate(columnMap, KeysType.DUP_KEYS); // should not throw
68+
69+
// Test 4: auto bucket is not limited by this config
70+
Config.max_bucket_num_per_partition = 10;
71+
DistributionDescriptor desc4 = new DistributionDescriptor(
72+
true, true, 1000, Lists.newArrayList("col1"));
73+
desc4.validate(columnMap, KeysType.DUP_KEYS); // auto bucket should not throw
74+
75+
// Test 5: random distribution also respects limit
76+
Config.max_bucket_num_per_partition = 100;
77+
DistributionDescriptor desc5 = new DistributionDescriptor(
78+
false, false, 50, Lists.newArrayList());
79+
desc5.validate(columnMap, KeysType.DUP_KEYS); // should not throw
80+
81+
DistributionDescriptor desc6 = new DistributionDescriptor(
82+
false, false, 150, Lists.newArrayList());
83+
AnalysisException ex2 = Assertions.assertThrows(AnalysisException.class,
84+
() -> desc6.validate(columnMap, KeysType.DUP_KEYS));
85+
Assertions.assertTrue(ex2.getMessage().contains("exceeds the maximum allowed value (100)"));
86+
87+
} finally {
88+
Config.max_bucket_num_per_partition = originalValue;
89+
}
90+
}
91+
92+
@Test
93+
public void testBucketNumZeroOrNegative() {
94+
Map<String, ColumnDefinition> columnMap = createColumnMap();
95+
96+
// hash distribution with bucket <= 0
97+
DistributionDescriptor desc1 = new DistributionDescriptor(
98+
true, false, 0, Lists.newArrayList("col1"));
99+
AnalysisException ex1 = Assertions.assertThrows(AnalysisException.class,
100+
() -> desc1.validate(columnMap, KeysType.DUP_KEYS));
101+
Assertions.assertTrue(ex1.getMessage().contains("greater than zero"));
102+
103+
DistributionDescriptor desc2 = new DistributionDescriptor(
104+
true, false, -1, Lists.newArrayList("col1"));
105+
AnalysisException ex2 = Assertions.assertThrows(AnalysisException.class,
106+
() -> desc2.validate(columnMap, KeysType.DUP_KEYS));
107+
Assertions.assertTrue(ex2.getMessage().contains("greater than zero"));
108+
109+
// random distribution with bucket <= 0
110+
DistributionDescriptor desc3 = new DistributionDescriptor(
111+
false, false, 0, Lists.newArrayList());
112+
AnalysisException ex3 = Assertions.assertThrows(AnalysisException.class,
113+
() -> desc3.validate(columnMap, KeysType.DUP_KEYS));
114+
Assertions.assertTrue(ex3.getMessage().contains("greater than zero"));
115+
}
116+
}

regression-test/pipeline/cloud_p0/conf/fe_custom.conf

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,5 @@ enable_advance_next_id = true
4848
check_table_lock_leaky = true
4949
enable_outfile_to_local=true
5050

51-
enable_notify_be_after_load_txn_commit=true
51+
enable_notify_be_after_load_txn_commit=true
52+
max_bucket_num_per_partition=512

regression-test/pipeline/p0/conf/fe.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,4 @@ max_query_profile_num = 2000
9292
max_spilled_profile_num = 2000
9393

9494
check_table_lock_leaky=true
95+
max_bucket_num_per_partition=512

0 commit comments

Comments
 (0)