Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1670,7 +1670,7 @@ public class Config extends ConfigBase {
* The number is determined by "start" and "end" in the dynamic partition parameters.
*/
@ConfField(mutable = true, masterOnly = true)
public static int max_dynamic_partition_num = 500;
public static int max_dynamic_partition_num = 20000;

/**
* Used to limit the maximum number of partitions that can be created when creating multi partition,
Expand Down Expand Up @@ -2966,9 +2966,8 @@ public class Config extends ConfigBase {
@ConfField(mutable = true, masterOnly = true, description = {
"对于自动分区表,防止用户意外创建大量分区,每个 OLAP 表允许的分区数量为`max_auto_partition_num`。默认 2000。",
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Chinese description still says the default is 2000 (“默认 2000。”) but the actual default was changed to 20000. Update the Chinese string to match the new default to avoid misleading operators.

Suggested change
"对于自动分区表,防止用户意外创建大量分区,每个 OLAP 表允许的分区数量为`max_auto_partition_num`。默认 2000。",
"对于自动分区表,防止用户意外创建大量分区,每个 OLAP 表允许的分区数量为`max_auto_partition_num`。默认 20000。",

Copilot uses AI. Check for mistakes.
"For auto-partitioned tables to prevent users from accidentally creating a large number of partitions, "
+ "the number of partitions allowed per OLAP table is `max_auto_partition_num`. Default 2000."
})
public static int max_auto_partition_num = 2000;
+ "the number of partitions allowed per OLAP table is `max_auto_partition_num`. Default 20000."})
public static int max_auto_partition_num = 20000;

@ConfField(mutable = true, masterOnly = true, description = {
"Partition rebalance 方式下各个 BE 的 tablet 数最大差值,小于该值时,会诊断为已均衡",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.common.UserException;
import org.apache.doris.metric.MetricRepo;
import org.apache.doris.policy.StoragePolicy;
import org.apache.doris.resource.Tag;
import org.apache.doris.thrift.TStorageMedium;
Expand Down Expand Up @@ -641,10 +642,20 @@ public static Map<String, String> analyzeDynamicPartition(Map<String, String> pr
}
expectCreatePartitionNum = (long) end - start;

if (!isReplay && hasEnd && (expectCreatePartitionNum > Config.max_dynamic_partition_num)
int dynamicPartitionLimit = Config.max_dynamic_partition_num;
if (!isReplay && hasEnd
&& Boolean.parseBoolean(analyzedProperties.getOrDefault(DynamicPartitionProperty.ENABLE, "true"))) {
throw new DdlException("Too many dynamic partitions: "
+ expectCreatePartitionNum + ". Limit: " + Config.max_dynamic_partition_num);
if (expectCreatePartitionNum > dynamicPartitionLimit) {
throw new DdlException("Too many dynamic partitions: "
+ expectCreatePartitionNum + ". Limit: " + dynamicPartitionLimit);
} else if (expectCreatePartitionNum > dynamicPartitionLimit * 8L / 10) {
LOG.warn("Dynamic partition count {} is approaching limit {} (>80%)."
+ " Consider increasing max_dynamic_partition_num.",
expectCreatePartitionNum, dynamicPartitionLimit);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT.increase(1L);
}
Comment on lines +652 to +657
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to the auto-partition path, this will warn and increment the counter on every analysis call above the 80% threshold, which can be very frequent (DDL validations and retries). Consider throttling/deduping, or incrementing only on threshold crossing to keep logs/metrics actionable and avoid alert fatigue.

Copilot uses AI. Check for mistakes.
}
}

if (properties.containsKey(DynamicPartitionProperty.START_DAY_OF_MONTH)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,10 @@ public final class MetricRepo {
public static GaugeMetricImpl<Long> GAUGE_AVG_PARTITION_SIZE_BYTES;
public static GaugeMetricImpl<Long> GAUGE_AVG_TABLET_SIZE_BYTES;

// Partition near-limit warnings
public static LongCounterMetric COUNTER_AUTO_PARTITION_NEAR_LIMIT;
public static LongCounterMetric COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT;

// Agent task
public static LongCounterMetric COUNTER_AGENT_TASK_REQUEST_TOTAL;
public static AutoMappedMetric<LongCounterMetric> COUNTER_AGENT_TASK_TOTAL;
Expand Down Expand Up @@ -1002,6 +1006,16 @@ public Integer getValue() {
GAUGE_AVG_TABLET_SIZE_BYTES = new GaugeMetricImpl<>("avg_tablet_size_bytes", MetricUnit.BYTES, "", 0L);
DORIS_METRIC_REGISTER.addMetrics(GAUGE_AVG_TABLET_SIZE_BYTES);

// Partition near-limit warning counters
COUNTER_AUTO_PARTITION_NEAR_LIMIT = new LongCounterMetric("auto_partition_near_limit_count",
MetricUnit.NOUNIT,
"number of times auto partition count exceeded 80% of max_auto_partition_num");
DORIS_METRIC_REGISTER.addMetrics(COUNTER_AUTO_PARTITION_NEAR_LIMIT);
COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT = new LongCounterMetric("dynamic_partition_near_limit_count",
MetricUnit.NOUNIT,
"number of times dynamic partition count exceeded 80% of max_dynamic_partition_num");
DORIS_METRIC_REGISTER.addMetrics(COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT);

COUNTER_AGENT_TASK_REQUEST_TOTAL = new LongCounterMetric("agent_task_request_total", MetricUnit.NOUNIT,
"total agent batch task request send to BE");
DORIS_METRIC_REGISTER.addMetrics(COUNTER_AGENT_TASK_REQUEST_TOTAL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
import org.apache.doris.load.routineload.RoutineLoadManager;
import org.apache.doris.master.MasterImpl;
import org.apache.doris.meta.MetaContext;
import org.apache.doris.metric.MetricRepo;
import org.apache.doris.mysql.privilege.AccessControllerManager;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.nereids.trees.plans.PlanNodeAndHash;
Expand Down Expand Up @@ -3795,15 +3796,24 @@ public TCreatePartitionResult createPartition(TCreatePartitionRequest request) t
// check partition's number limit. because partitions in addPartitionClauseMap may be duplicated with existing
// partitions, which would lead to false positive. so we should check the partition number AFTER adding new
// partitions using its ACTUAL NUMBER, rather than the sum of existing and requested partitions.
if (olapTable.getPartitionNum() > Config.max_auto_partition_num) {
int partitionNum = olapTable.getPartitionNum();
int autoPartitionLimit = Config.max_auto_partition_num;
if (partitionNum > autoPartitionLimit) {
String errorMessage = String.format(
"partition numbers %d exceeded limit of variable max_auto_partition_num %d",
olapTable.getPartitionNum(), Config.max_auto_partition_num);
partitionNum, autoPartitionLimit);
LOG.warn(errorMessage);
errorStatus.setErrorMsgs(Lists.newArrayList(errorMessage));
result.setStatus(errorStatus);
LOG.warn("send create partition error status: {}", result);
return result;
} else if (partitionNum > autoPartitionLimit * 8 / 10) {
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

autoPartitionLimit * 8 / 10 is computed using int arithmetic and can overflow if max_auto_partition_num is configured to a large value (this is a mutable config). Use long arithmetic (cast before multiply) or compare via partitionNum * 10 > autoPartitionLimit * 8 using long to avoid overflow/truncation pitfalls.

Suggested change
} else if (partitionNum > autoPartitionLimit * 8 / 10) {
} else if ((long) partitionNum * 10 > (long) autoPartitionLimit * 8) {

Copilot uses AI. Check for mistakes.
LOG.warn("Table {}.{} auto partition count {} is approaching limit {} (>80%)."
+ " Consider increasing max_auto_partition_num.",
db.getFullName(), olapTable.getName(), partitionNum, autoPartitionLimit);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L);
}
Comment on lines +3810 to +3816
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This warning (and counter increment) will trigger on every createPartition call once the table is above the 80% threshold, which can produce noisy logs and rapidly increasing counters in busy clusters. Consider adding throttling/deduping (e.g., log at most once per table per time window, or only when crossing the threshold) and similarly gate the metric increment to threshold-crossing events rather than per-request.

Suggested change
} else if (partitionNum > autoPartitionLimit * 8 / 10) {
LOG.warn("Table {}.{} auto partition count {} is approaching limit {} (>80%)."
+ " Consider increasing max_auto_partition_num.",
db.getFullName(), olapTable.getName(), partitionNum, autoPartitionLimit);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L);
}
} else {
// Only emit the warning and increment the metric when crossing the 80% threshold.
// Estimate the partition count before this request by subtracting the number of
// partitions requested to be added. This avoids noisy logs/metrics when the table
// is already above the threshold.
int prevPartitionNumEstimate = partitionNum - addPartitionClauseMap.size();
if (prevPartitionNumEstimate < 0) {
prevPartitionNumEstimate = 0;
}
int threshold80 = autoPartitionLimit * 8 / 10;
if (partitionNum > threshold80 && prevPartitionNumEstimate <= threshold80) {
LOG.warn("Table {}.{} auto partition count {} is approaching limit {} (>80%)."
+ " Consider increasing max_auto_partition_num.",
db.getFullName(), olapTable.getName(), partitionNum, autoPartitionLimit);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L);
}
}

Copilot uses AI. Check for mistakes.
}

// build partition & tablets
Expand Down
Loading