From dd665b3413c044c56ab9a7e9d4d51f3e5fc61bd1 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 10 May 2024 21:22:54 -0700 Subject: [PATCH 001/257] autorepair v2 framework --- .../pages/managing/operating/metrics.adoc | 50 ++ .../org/apache/cassandra/config/Config.java | 4 + .../cassandra/config/DatabaseDescriptor.java | 16 + .../apache/cassandra/config/DurationSpec.java | 3 +- .../cassandra/cql3/UntypedResultSet.java | 7 + .../statements/ModificationStatement.java | 2 +- .../statements/schema/TableAttributes.java | 8 + .../cassandra/metrics/AutoRepairMetrics.java | 165 ++++ .../metrics/AutoRepairMetricsManager.java | 34 + .../repair/RepairMessageVerbHandler.java | 7 + .../repair/autorepair/AutoRepair.java | 402 +++++++++ .../repair/autorepair/AutoRepairConfig.java | 408 +++++++++ .../repair/autorepair/AutoRepairKeyspace.java | 84 ++ .../repair/autorepair/AutoRepairState.java | 347 ++++++++ .../repair/autorepair/AutoRepairUtils.java | 812 ++++++++++++++++++ .../DefaultAutoRepairTokenSplitter.java | 88 ++ .../IAutoRepairTokenRangeSplitter.java | 31 + .../cassandra/schema/AutoRepairParams.java | 145 ++++ .../cassandra/schema/SchemaKeyspace.java | 14 +- .../apache/cassandra/schema/TableParams.java | 59 +- .../service/ActiveRepairService.java | 33 + .../cassandra/service/AutoRepairService.java | 147 ++++ .../service/AutoRepairServiceMBean.java | 60 ++ .../cassandra/service/CassandraDaemon.java | 2 + .../cassandra/service/StorageService.java | 15 +- .../org/apache/cassandra/tools/NodeProbe.java | 85 ++ .../org/apache/cassandra/tools/NodeTool.java | 2 + .../tools/nodetool/GetAutoRepairConfig.java | 77 ++ .../tools/nodetool/SetAutoRepairConfig.java | 156 ++++ .../apache/cassandra/utils/FBUtilities.java | 7 + test/unit/org/apache/cassandra/Util.java | 13 + .../config/YamlConfigurationLoaderTest.java | 31 +- .../autorepair/AutoRepairConfigTest.java | 453 ++++++++++ .../autorepair/AutoRepairKeyspaceTest.java | 64 ++ .../AutoRepairParameterizedTest.java | 583 +++++++++++++ .../AutoRepairStateFactoryTest.java | 52 ++ .../autorepair/AutoRepairStateTest.java | 382 ++++++++ .../repair/autorepair/AutoRepairTest.java | 119 +++ .../autorepair/AutoRepairUtilsTest.java | 479 +++++++++++ .../service/ActiveRepairServiceTest.java | 49 +- 40 files changed, 5474 insertions(+), 21 deletions(-) create mode 100644 src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java create mode 100644 src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java create mode 100644 src/java/org/apache/cassandra/schema/AutoRepairParams.java create mode 100644 src/java/org/apache/cassandra/service/AutoRepairService.java create mode 100644 src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 4f3d66652c24..35a0026ac1db 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1081,6 +1081,56 @@ partitions processed per logged batch partitions processed per unlogged batch |=== +== Automated Repair Metrics + +Metrics specifc to automated repair. + +Reported name format: + +*Metric Name*:: +`org.apache.cassandra.metrics.AutoRepairMetrics.` +*JMX MBean*:: +`org.apache.cassandra.metrics:type=AutoRepairMetrics name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|RepairsInProgress |Gauge |Repair is in progress +on the node + +|NodeRepairTimeInSec |Gauge |Time taken to repair +the node + +|ClusterRepairTimeInSec |Gauge |Time taken to repair +the entire Cassandra cluster + +|SkippedTablesCount |Gauge |Number of tables skipped +on the node + +|LongestUnrepairedSec |Gauge |Time since the last repair +ran on the node + +|FailedTablesCount |Gauge |Number of tables encountered +failure during repair on the node + +|TotalMVTablesConsideredForRepair |Gauge |Number of materialized +views considered on the node + +|TotalDisabledRepairTables |Gauge |Number of tables on which +the automated repair has been disabled on the node + +|RepairTurnMyTurn |Counter |Represents the node's turn to repair + +|RepairTurnMyTurnDueToPriority |Counter |Represents the node's turn to repair +due to priority set in the automated repair + +|RepairTurnMyTurnForceRepair |Counter |Represents the node's turn to repair +due to force repair set in the automated repair + + +|=== + + == JVM Metrics JVM metrics such as memory and garbage collection statistics can either diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 65179fcd7e91..32f195c43941 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -32,6 +32,8 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; + +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -367,6 +369,7 @@ public MemtableOptions() // The number of executors to use for building secondary indexes public volatile int concurrent_index_builders = 2; + public volatile double incremental_repair_disk_headroom_reject_ratio = 0.2; // at least 20% of disk must be unused to run incremental repair /** * @deprecated retry support removed on CASSANDRA-10992 @@ -991,6 +994,7 @@ public static void setClientMode(boolean clientMode) public volatile boolean password_validator_reconfiguration_enabled = true; public volatile CustomGuardrailConfig password_validator = new CustomGuardrailConfig(); + public volatile AutoRepairConfig auto_repair = new AutoRepairConfig(); /** * The variants of paxos implementation and semantics supported by Cassandra. diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 0439dbb00c5f..abc3ca58787a 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -113,6 +113,7 @@ import org.apache.cassandra.locator.ReconnectableSnitchHelper; import org.apache.cassandra.locator.SeedProvider; import org.apache.cassandra.locator.SnitchAdapter; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.security.AbstractCryptoProvider; import org.apache.cassandra.security.EncryptionContext; import org.apache.cassandra.security.JREProvider; @@ -5899,4 +5900,19 @@ public static boolean getAccordEphemeralReadEnabledEnabled() { return conf.accord.ephemeralReadEnabled; } + + public static AutoRepairConfig getAutoRepairConfig() + { + return conf.auto_repair; + } + + public static double getIncrementalRepairDiskHeadroomRejectRatio() + { + return conf.incremental_repair_disk_headroom_reject_ratio; + } + + public static void setIncrementalRepairDiskHeadroomRejectRatio(double value) + { + conf.incremental_repair_disk_headroom_reject_ratio = value; + } } diff --git a/src/java/org/apache/cassandra/config/DurationSpec.java b/src/java/org/apache/cassandra/config/DurationSpec.java index 01a7d70b5f46..ee35de58e81b 100644 --- a/src/java/org/apache/cassandra/config/DurationSpec.java +++ b/src/java/org/apache/cassandra/config/DurationSpec.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.config; +import java.io.Serializable; import java.time.Duration; import java.util.Arrays; import java.util.Objects; @@ -42,7 +43,7 @@ * users the opportunity to be able to provide config with a unit of their choice in cassandra.yaml as per the available * options. (CASSANDRA-15234) */ -public abstract class DurationSpec +public abstract class DurationSpec implements Serializable { /** * The Regexp used to parse the duration provided as String. diff --git a/src/java/org/apache/cassandra/cql3/UntypedResultSet.java b/src/java/org/apache/cassandra/cql3/UntypedResultSet.java index 6d2848dad2d2..872ba7cf0d1e 100644 --- a/src/java/org/apache/cassandra/cql3/UntypedResultSet.java +++ b/src/java/org/apache/cassandra/cql3/UntypedResultSet.java @@ -382,6 +382,13 @@ public long getLong(String column) return LongType.instance.compose(data.get(column)); } + // this function will return the default value if the row doesn't have that column or the column data is null + // This function is used to avoid the nullpointerexception + public long getLong(String column, long ifNull) { + ByteBuffer bytes = data.get(column); + return bytes == null ? ifNull : LongType.instance.compose(bytes); + } + public Set getSet(String column, AbstractType type) { ByteBuffer raw = data.get(column); diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index cecf839e8be0..33b4b676fd95 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -141,7 +141,7 @@ public abstract class ModificationStatement implements CQLStatement.SingleKeyspa public static final String CUSTOM_EXPRESSIONS_NOT_ALLOWED = "Custom index expressions cannot be used in WHERE clauses for UPDATE or DELETE statements"; - private static final ColumnIdentifier CAS_RESULT_COLUMN = new ColumnIdentifier("[applied]", false); + public static final ColumnIdentifier CAS_RESULT_COLUMN = new ColumnIdentifier("[applied]", false); protected final StatementType type; diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index 2e643e5472f7..62846cffa8ec 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -27,6 +27,8 @@ import org.apache.cassandra.cql3.statements.PropertyDefinitions; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.schema.AutoRepairParams; import org.apache.cassandra.schema.CachingParams; import org.apache.cassandra.schema.CompactionParams; import org.apache.cassandra.schema.CompressionParams; @@ -196,6 +198,12 @@ private TableParams build(TableParams.Builder builder) if (hasOption(Option.TRANSACTIONAL_MIGRATION_FROM)) builder.transactionalMigrationFrom(TransactionalMigrationFromMode.fromString(getString(Option.TRANSACTIONAL_MIGRATION_FROM))); + if (hasOption(Option.AUTOMATED_REPAIR_FULL)) + builder.automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, getMap(Option.AUTOMATED_REPAIR_FULL))); + + if (hasOption(Option.AUTOMATED_REPAIR_INCREMENTAL)) + builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, getMap(Option.AUTOMATED_REPAIR_INCREMENTAL))); + return builder.build(); } diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java new file mode 100644 index 000000000000..cd46c462fb66 --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.metrics; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.google.common.annotations.VisibleForTesting; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepair; + +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; + +/** + * Metrics related to AutoRepair. + */ +public class AutoRepairMetrics +{ + public Gauge repairsInProgress; + public Gauge nodeRepairTimeInSec; + public Gauge clusterRepairTimeInSec; + public Gauge skippedTablesCount; + public Gauge longestUnrepairedSec; + public Gauge failedTablesCount; + public Counter repairTurnMyTurn; + public Counter repairTurnMyTurnDueToPriority; + public Counter repairTurnMyTurnForceRepair; + public Gauge totalMVTablesConsideredForRepair; + public Gauge totalDisabledRepairTables; + + public AutoRepairMetrics(RepairType repairType) + { + AutoRepairMetricsFactory factory = new AutoRepairMetricsFactory(repairType); + + repairsInProgress = Metrics.register(factory.createMetricName("RepairsInProgress"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).isRepairInProgress() ? 1 : 0; + } + }); + + nodeRepairTimeInSec = Metrics.register(factory.createMetricName("NodeRepairTimeInSec"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getNodeRepairTimeInSec(); + } + }); + + clusterRepairTimeInSec = Metrics.register(factory.createMetricName("ClusterRepairTimeInSec"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getClusterRepairTimeInSec(); + } + }); + + skippedTablesCount = Metrics.register(factory.createMetricName("SkippedTablesCount"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getRepairSkippedTablesCount(); + } + }); + + longestUnrepairedSec = Metrics.register(factory.createMetricName("LongestUnrepairedSec"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getLongestUnrepairedSec(); + } + }); + + failedTablesCount = Metrics.register(factory.createMetricName("FailedTablesCount"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getRepairFailedTablesCount(); + } + }); + + repairTurnMyTurn = Metrics.counter(factory.createMetricName("RepairTurnMyTurn")); + repairTurnMyTurnDueToPriority = Metrics.counter(factory.createMetricName("RepairTurnMyTurnDueToPriority")); + repairTurnMyTurnForceRepair = Metrics.counter(factory.createMetricName("RepairTurnMyTurnForceRepair")); + + totalMVTablesConsideredForRepair = Metrics.register(factory.createMetricName("TotalMVTablesConsideredForRepair"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getTotalMVTablesConsideredForRepair(); + } + }); + + totalDisabledRepairTables = Metrics.register(factory.createMetricName("TotalDisabledRepairTables"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getTotalDisabledTablesRepairCount(); + } + }); + } + + public void recordTurn(AutoRepairUtils.RepairTurn turn) + { + switch (turn) + { + case MY_TURN: + repairTurnMyTurn.inc(); + break; + case MY_TURN_FORCE_REPAIR: + repairTurnMyTurnForceRepair.inc(); + break; + case MY_TURN_DUE_TO_PRIORITY: + repairTurnMyTurnDueToPriority.inc(); + break; + default: + throw new RuntimeException(String.format("Unrecoginized turn: %s", turn.name())); + } + } + + @VisibleForTesting + protected static class AutoRepairMetricsFactory implements MetricNameFactory + { + private static final String TYPE = "AutoRepair"; + @VisibleForTesting + protected final String repairType; + + protected AutoRepairMetricsFactory(RepairType repairType) + { + this.repairType = repairType.toString().toLowerCase(); + } + + @Override + public CassandraMetricsRegistry.MetricName createMetricName(String metricName) + { + StringBuilder mbeanName = new StringBuilder(); + mbeanName.append(DefaultNameFactory.GROUP_NAME).append(':'); + mbeanName.append("type=").append(TYPE); + mbeanName.append(",name=").append(metricName); + mbeanName.append(",repairType=").append(repairType); + + StringBuilder scope = new StringBuilder(); + scope.append("repairType=").append(repairType); + + return new CassandraMetricsRegistry.MetricName(DefaultNameFactory.GROUP_NAME, TYPE.toLowerCase(), + metricName, scope.toString(), mbeanName.toString()); + } + } +} diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java new file mode 100644 index 000000000000..e293945c9846 --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.metrics; + +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class AutoRepairMetricsManager +{ + private static final Map metrics = new ConcurrentHashMap<>(); + + public static AutoRepairMetrics getMetrics(RepairType repairType) + { + return metrics.computeIfAbsent(repairType, k -> new AutoRepairMetrics(repairType)); + } +} diff --git a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java index 72252f56ab2d..ea9742609fed 100644 --- a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java +++ b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java @@ -123,6 +123,13 @@ public void doVerb(final Message message) sendFailureResponse(message); return; } + if (!ActiveRepairService.verifyDiskHeadroomThreshold(prepareMessage.parentRepairSession, prepareMessage.previewKind, prepareMessage.isIncremental)) + { + // error is logged in verifyDiskHeadroomThreshold + state.phase.fail("Not enough disk headroom to perform incremental repair"); + sendFailureResponse(message); + return; + } List columnFamilyStores = new ArrayList<>(prepareMessage.tableIds.size()); for (TableId tableId : prepareMessage.tableIds) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java new file mode 100644 index 000000000000..f6a08e73b16d --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -0,0 +1,402 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.FutureTask; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; + +import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.Pair; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.concurrent.NamedThreadFactory; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.Tables; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; + +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; + +public class AutoRepair +{ + // Initial delay for repair session to start after setup + final static long INITIAL_REPAIR_DELAY_SEC = 30; + + private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); + + @VisibleForTesting + protected static Supplier timeFunc = System::currentTimeMillis; + + public static AutoRepair instance = new AutoRepair(); + + @VisibleForTesting + protected final Map repairExecutors; + @VisibleForTesting + protected final Map repairStates; + + protected final Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); + + + @VisibleForTesting + protected AutoRepair() + { + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); + repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType, Thread.NORM_PRIORITY)); + repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); + tokenRangeSplitters.put(repairType, FBUtilities.newAutoRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType))); + } + } + + public void setup() + { + verifyIsSafeToEnable(); + + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); + AutoRepairService.setup(); + AutoRepairUtils.setup(); + + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + repairExecutors.get(repairType).scheduleWithFixedDelay( + () -> repair(repairType, 60000), + INITIAL_REPAIR_DELAY_SEC, + config.getRepairCheckInterval().toSeconds(), + TimeUnit.SECONDS); + } + } + + @VisibleForTesting + protected void verifyIsSafeToEnable() + { + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); + if (config.isAutoRepairEnabled(AutoRepairConfig.RepairType.incremental) && + (DatabaseDescriptor.getMaterializedViewsEnabled() || DatabaseDescriptor.isCDCEnabled())) + throw new ConfigurationException("Cannot enable incremental repair with materialized views or CDC enabled"); + } + + // repairAsync runs a repair session of the given type asynchronously. + public void repairAsync(AutoRepairConfig.RepairType repairType, long millisToWait) + { + repairExecutors.get(repairType).submit(() -> repair(repairType, millisToWait)); + } + + // repair runs a repair session of the given type synchronously. + public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (!config.isAutoRepairEnabled(repairType)) + { + logger.debug("Auto-repair is disabled for repair type {}", repairType); + return; + } + + + AutoRepairState repairState = repairStates.get(repairType); + + try + { + String localDC = DatabaseDescriptor.getLocalDataCenter(); + if (config.getIgnoreDCs(repairType).contains(localDC)) + { + logger.info("Not running repair as this node belongs to datacenter {}", localDC); + return; + } + + // refresh the longest unrepaired node + repairState.setLongestUnrepairedNode(AutoRepairUtils.getHostWithLongestUnrepairTime(repairType)); + + //consistency level to use for local query + UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + RepairTurn turn = AutoRepairUtils.myTurnToRunRepair(repairType, myId); + if (turn == MY_TURN || turn == MY_TURN_DUE_TO_PRIORITY || turn == MY_TURN_FORCE_REPAIR) + { + repairState.recordTurn(turn); + // For normal auto repair, we will use primary range only repairs (Repair with -pr option). + // For some cases, we may set the auto_repair_primary_token_range_only flag to false then we will do repair + // without -pr. We may also do force repair for certain node that we want to repair all the data on one node + // When doing force repair, we want to repair without -pr. + boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) + && turn != MY_TURN_FORCE_REPAIR; + repairState.setTotalTablesConsideredForRepair(0); + if (repairState.getLastRepairTime() != 0) + { + /** check if it is too soon to run repair. one of the reason we + * should not run frequent repair is because repair triggers + * memtable flush + */ + long timeElapsedSinceLastRepair = TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - repairState.getLastRepairTime()); + if (timeElapsedSinceLastRepair < config.getRepairMinInterval(repairType).toSeconds()) + { + logger.info("Too soon to run repair, last repair was done {} seconds ago", + timeElapsedSinceLastRepair); + return; + } + } + + long startTime = timeFunc.get(); + logger.info("My host id: {}, my turn to run repair...repair primary-ranges only? {}", myId, + config.getRepairPrimaryTokenRangeOnly(repairType)); + AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId, timeFunc.get(), turn); + + repairState.setRepairKeyspaceCount(0); + repairState.setRepairTableSuccessCount(0); + repairState.setRepairFailedTablesCount(0); + repairState.setRepairSkippedTablesCount(0); + repairState.setRepairInProgress(true); + repairState.setTotalMVTablesConsideredForRepair(0); + for (Keyspace keyspace : Keyspace.all()) + { + Tables tables = keyspace.getMetadata().tables; + Iterator iter = tables.iterator(); + String keyspaceName = keyspace.getName(); + if (!AutoRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) + { + continue; + } + + repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); + List tablesToBeRepaired = new ArrayList<>(); + while (iter.hasNext()) + { + repairState.setTotalTablesConsideredForRepair(repairState.getTotalTablesConsideredForRepair() + 1); + TableMetadata tableMetadata = iter.next(); + String tableName = tableMetadata.name; + tablesToBeRepaired.add(tableName); + + // See if we should repair MVs as well that are associated with this given table + List mvs = AutoRepairUtils.getAllMVs(repairType, keyspace, tableMetadata); + if (!mvs.isEmpty()) + { + tablesToBeRepaired.addAll(mvs); + repairState.setTotalMVTablesConsideredForRepair(repairState.getTotalMVTablesConsideredForRepair() + mvs.size()); + } + } + + for (String tableName : tablesToBeRepaired) + { + try + { + ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); + if (!columnFamilyStore.metadata().params.automatedRepair.get(repairType).repairEnabled()) + { + logger.info("Repair is disabled for keyspace {} for tables: {}", keyspaceName, tableName); + repairState.setTotalDisabledTablesRepairCount(repairState.getTotalDisabledTablesRepairCount() + 1); + continue; + } + // this is done to make autorepair safe as running repair on table with more sstables + // may have its own challenges + int size = columnFamilyStore.getLiveSSTables().size(); + if (size > config.getRepairSSTableCountHigherThreshold(repairType)) + { + logger.info("Too many SSTables for repair, not doing repair on table {}.{} " + + "totalSSTables {}", keyspaceName, tableName, columnFamilyStore.getLiveSSTables().size()); + repairState.setRepairSkippedTablesCount(repairState.getRepairSkippedTablesCount() + 1); + continue; + } + + if (config.getRepairByKeyspace(repairType)) + { + logger.info("Repair keyspace {} for tables: {}", keyspaceName, tablesToBeRepaired); + } + else + { + logger.info("Repair table {}.{}", keyspaceName, tableName); + } + long tableStartTime = timeFunc.get(); + boolean repairSuccess = true; + Set> ranges = new HashSet<>(); + List> subRangesToBeRepaired = tokenRangeSplitters.get(repairType).getRange(repairType, primaryRangeOnly, keyspaceName, tableName); + int totalSubRanges = subRangesToBeRepaired.size(); + int totalProcessedSubRanges = 0; + for (Pair token : subRangesToBeRepaired) + { + if (!config.isAutoRepairEnabled(repairType)) + { + logger.error("Auto-repair for type {} is disabled hence not running repair", repairType); + repairState.setRepairInProgress(false); + return; + } + + if (config.getRepairByKeyspace(repairType)) + { + if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, tableStartTime, tablesToBeRepaired.size())) + { + repairState.setRepairSkippedTablesCount(repairState.getRepairSkippedTablesCount() + tablesToBeRepaired.size()); + logger.info("Keyspace took too much time to repair hence skipping it {}", + keyspaceName); + break; + } + } + else + { + if (AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) + { + repairState.setRepairSkippedTablesCount(repairState.getRepairSkippedTablesCount() + 1); + logger.info("Table took too much time to repair hence skipping it {}.{}", + keyspaceName, tableName); + break; + } + } + Token childStartToken = token.left; + Token childEndToken = token.right; + logger.debug("Current Token Left side {}, right side {}", childStartToken + .toString(), childEndToken.toString()); + + ranges.add(new Range<>(childStartToken, childEndToken)); + totalProcessedSubRanges++; + if ((totalProcessedSubRanges % config.getRepairThreads(repairType) == 0) || + (totalProcessedSubRanges == totalSubRanges)) + { + RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, + config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : ImmutableList.of(tableName), + ranges, primaryRangeOnly); + repairState.resetWaitCondition(); + new Thread(NamedThreadFactory.createAnonymousThread(new FutureTask<>(task, null))).start(); + try + { + repairState.waitForRepairToComplete(); + } + catch (InterruptedException e) + { + logger.error("Exception in cond await:", e); + } + + //check repair status + if (repairState.isSuccess()) + { + logger.info("Repair completed for range {}-{} for {}.{}, total subranges: {}," + + "processed subranges: {}", childStartToken, childEndToken, + keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, totalSubRanges, totalProcessedSubRanges); + } + else + { + repairSuccess = false; + //in future we can add retry, etc. + logger.info("Repair failed for range {}-{} for {}.{} total subranges: {}," + + "processed subranges: {}", childStartToken, childEndToken, + keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, totalSubRanges, totalProcessedSubRanges); + } + ranges.clear(); + } + } + int touchedTables = config.getRepairByKeyspace(repairType) ? tablesToBeRepaired.size() : 1; + if (repairSuccess) + { + repairState.setRepairTableSuccessCount(repairState.getRepairTableSuccessCount() + touchedTables); + } + else + { + repairState.setRepairFailedTablesCount(repairState.getRepairFailedTablesCount() + touchedTables); + } + if (config.getRepairByKeyspace(repairType)) + { + logger.info("Repair completed for keyspace {}, tables: {}", keyspaceName, tablesToBeRepaired); + break; + } + else + { + logger.info("Repair completed for {}.{}", keyspaceName, tableName); + } + } + catch (Exception e) + { + logger.error("Exception while repairing keyspace {}:", keyspaceName, e); + } + } + } + + //if it was due to priority then remove it now + if (turn == MY_TURN_DUE_TO_PRIORITY) + { + logger.info("Remove current host from priority list"); + AutoRepairUtils.removePriorityStatus(repairType, myId); + } + + repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); + long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); + logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + + "repairTableSuccessCount {}, repairTableFailureCount {}, " + + "repairTableSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), + repairState.getRepairTableSuccessCount(), repairState.getRepairFailedTablesCount(), + repairState.getRepairSkippedTablesCount()); + if (repairState.getLastRepairTime() != 0) + { + repairState.setClusterRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - + repairState.getLastRepairTime())); + logger.info("Cluster repair time for repair type {}: {} day(s)", repairType, + TimeUnit.SECONDS.toDays(repairState.getClusterRepairTimeInSec())); + } + repairState.setLastRepairTime(timeFunc.get()); + if (timeInHours == 0 && millisToWait > 0) + { + //If repair finished quickly, happens for an empty instance, in such case + //wait for a minute so that the JMX metrics can detect the repairInProgress + logger.info("Wait for {} milliseconds for repair type {}.", millisToWait, repairType); + Thread.sleep(millisToWait); + } + repairState.setRepairInProgress(false); + AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId, timeFunc.get()); + } + else + { + logger.info("Waiting for my turn..."); + } + } + catch (Exception e) + { + logger.error("Exception in autorepair:", e); + } + } + + public AutoRepairState getRepairState(AutoRepairConfig.RepairType repairType) + { + return repairStates.get(repairType); + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java new file mode 100644 index 000000000000..bd20c1d3c548 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -0,0 +1,408 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.exceptions.ConfigurationException; + +public class AutoRepairConfig implements Serializable +{ + // enable/disable auto repair globally, overrides all other settings. Cannot be modified dynamically. + public final Boolean enabled; + // the interval in seconds between checks for eligible repair operations. Cannot be modified dynamically. + public final DurationSpec.IntSecondsBound repair_check_interval = new DurationSpec.IntSecondsBound("5m"); + // configures how long repair history is kept for a replaced node + public volatile DurationSpec.IntSecondsBound history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("2h"); + // global_settings overides Options.defaultOptions for all repair types + public volatile Options global_settings; + + public enum RepairType + { + full, + incremental; + + public static AutoRepairState getAutoRepairState(RepairType repairType) + { + switch (repairType) + { + case full: + return new FullRepairState(); + case incremental: + return new IncrementalRepairState(); + } + + throw new IllegalArgumentException("Invalid repair type: " + repairType); + } + } + + // repair_type_overrides overrides the global_settings for a specific repair type + public volatile Map repair_type_overrides = new EnumMap<>(RepairType.class); + + public AutoRepairConfig() + { + this(false); + } + + public AutoRepairConfig(boolean enabled) + { + this.enabled = enabled; + global_settings = Options.getDefaultOptions(); + for (RepairType type : RepairType.values()) + { + repair_type_overrides.put(type, new Options()); + } + } + + public DurationSpec.IntSecondsBound getRepairCheckInterval() + { + return repair_check_interval; + } + + public boolean isAutoRepairSchedulingEnabled() + { + return enabled; + } + + public DurationSpec.IntSecondsBound getAutoRepairHistoryClearDeleteHostsBufferInterval() + { + return history_clear_delete_hosts_buffer_interval; + } + + public void setAutoRepairHistoryClearDeleteHostsBufferInterval(String duration) + { + history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound(duration); + } + + public boolean isAutoRepairEnabled(RepairType repairType) + { + return enabled && applyOverrides(repairType, opt -> opt.enabled); + } + + public void setAutoRepairEnabled(RepairType repairType, boolean enabled) + { + if (enabled && repairType == RepairType.incremental && + (DatabaseDescriptor.getMaterializedViewsEnabled() || DatabaseDescriptor.isCDCEnabled())) + throw new ConfigurationException("Cannot enable incremental repair with materialized views or CDC enabled"); + + ensureOverrides(repairType); + repair_type_overrides.get(repairType).enabled = enabled; + } + + public void setRepairByKeyspace(RepairType repairType, boolean repairByKeyspace) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).repair_by_keyspace = repairByKeyspace; + } + + public boolean getRepairByKeyspace(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_by_keyspace); + } + + public int getRepairThreads(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.number_of_repair_threads); + } + + public void setRepairThreads(RepairType repairType, int repairThreads) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).number_of_repair_threads = repairThreads; + } + + public int getRepairSubRangeNum(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.number_of_subranges); + } + + public void setRepairSubRangeNum(RepairType repairType, int repairSubRanges) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).number_of_subranges = repairSubRanges; + } + + public DurationSpec.IntSecondsBound getRepairMinInterval(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.min_repair_interval); + } + + public void setRepairMinInterval(RepairType repairType, String minRepairInterval) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).min_repair_interval = new DurationSpec.IntSecondsBound(minRepairInterval); + } + + public int getRepairSSTableCountHigherThreshold(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.sstable_upper_threshold); + } + + public void setRepairSSTableCountHigherThreshold(RepairType repairType, int sstableHigherThreshold) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).sstable_upper_threshold = sstableHigherThreshold; + } + + public DurationSpec.IntSecondsBound getAutoRepairTableMaxRepairTime(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.table_max_repair_time); + } + + public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).table_max_repair_time = new DurationSpec.IntSecondsBound(autoRepairTableMaxRepairTime); + } + + public Set getIgnoreDCs(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.ignore_dcs); + } + + public void setIgnoreDCs(RepairType repairType, Set ignoreDCs) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).ignore_dcs = ignoreDCs; + } + + public boolean getRepairPrimaryTokenRangeOnly(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_primary_token_range_only); + } + + public void setRepairPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).repair_primary_token_range_only = primaryTokenRangeOnly; + } + + public int getParallelRepairPercentageInGroup(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.parallel_repair_percentage_in_group); + } + + public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).parallel_repair_percentage_in_group = percentageInGroup; + } + + public int getParallelRepairCountInGroup(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.parallel_repair_count_in_group); + } + + public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).parallel_repair_count_in_group = countInGroup; + } + + public boolean getMVRepairEnabled(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.mv_repair_enabled); + } + + public void setMVRepairEnabled(RepairType repairType, boolean enabled) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).mv_repair_enabled = enabled; + } + + public void setForceRepairNewNode(RepairType repairType, boolean forceRepairNewNode) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).force_repair_new_node = forceRepairNewNode; + } + + public boolean getForceRepairNewNode(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.force_repair_new_node); + } + + public String getTokenRangeSplitter(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.token_range_splitter); + } + + // Options configures auto-repair behavior for a given repair type. + // All fields can be modified dynamically. + public static class Options implements Serializable + { + // The separator separating different DCs in repair_dc_groups + public static final String DC_GROUP_SEPARATOR = "\\|"; + + // defaultOptions defines the default auto-repair behavior when no overrides are defined + @VisibleForTesting + protected static final Options defaultOptions = getDefaultOptions(); + + public Options() + { + } + + @VisibleForTesting + protected static Options getDefaultOptions() + { + Options opts = new Options(); + + opts.enabled = false; + opts.repair_by_keyspace = false; + opts.number_of_subranges = 1; + opts.number_of_repair_threads = 1; + opts.parallel_repair_count_in_group = 1; + opts.parallel_repair_percentage_in_group = 0; + opts.sstable_upper_threshold = 10000; + opts.min_repair_interval = new DurationSpec.IntSecondsBound("24h"); + opts.ignore_dcs = new HashSet<>(); + opts.repair_primary_token_range_only = true; + opts.force_repair_new_node = false; + opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); + opts.mv_repair_enabled = true; + opts.token_range_splitter = DefaultAutoRepairTokenSplitter.class.getName(); + + return opts; + } + + // enable/disable auto repair for the given repair type + public volatile Boolean enabled; + // auto repair is default repair table by table, if this is enabled, the framework will repair all the tables in a keyspace in one go. + public volatile Boolean repair_by_keyspace; + // the number of subranges to split each to-be-repaired token range into, + // the higher this number, the smaller the repair sessions will be + // How many subranges to divide one range into? The default is 1. + // If you are using v-node, say 256, then the repair will always go one v-node range at a time, this parameter, additionally, will let us further subdivide a given v-node range into sub-ranges. + // With the value “1” and v-nodes of 256, a given table on a node will undergo the repair 256 times. But with a value “2,” the same table on a node will undergo a repair 512 times because every v-node range will be further divided by two. + // If you do not use v-nodes or the number of v-nodes is pretty small, say 8, setting this value to a higher number, say 16, will be useful to repair on a smaller range, and the chance of succeeding is higher. + public volatile Integer number_of_subranges; + // the number of repair threads to run for a given invoked Repair Job. + // Once the scheduler schedules one Job, then howmany threads to use inside that job will be controlled through this parameter. + // This is similar to -j for repair options for the nodetool repair command. + public volatile Integer number_of_repair_threads; + // the number of repair sessions that can run in parallel in a single group + // The number of nodes running repair parallelly. If parallelrepaircount is set, it will choose the larger value of the two. The default is 3. + // This configuration controls how many nodes would run repair in parallel. + // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. + // If one or more node(s) finish repair, then the framework automatically picks up the next candidate and ensures the maximum number of nodes running repair do not exceed “3”. + public volatile Integer parallel_repair_count_in_group; + // the number of repair sessions that can run in parallel in a single groupas a percentage + // of the total number of nodes in the group [0,100] + // The percentage of nodes in the cluster that run repair parallelly. If parallelrepaircount is set, it will choose the larger value of the two. + //The problem with a fixed number of nodes (the above property) is that in a large-scale environment, + // the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. + //The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. + // So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. + // Extremely fewer manual interventions as it will rarely violate the repair SLA for customers + public volatile Integer parallel_repair_percentage_in_group; + // the upper threshold of SSTables allowed to participate in a single repair session + // Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. + // This is to avoid penalizing good neighbors with an outlier. + public volatile Integer sstable_upper_threshold; + // the minimum time in hours between repairs of the same token range + // The minimum number of hours to run one repair cycle is 24 hours. The default is 24 hours. + // This means that if auto repair finishes one round on one cluster within 24 hours, it won’t start a new round. + // This is applicable for extremely tiny clusters, say 3 nodes. + public volatile DurationSpec.IntSecondsBound min_repair_interval; + // specifies a denylist of datacenters to repair + // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. + public volatile Set ignore_dcs; + // Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' + // It is the same as -pr in nodetool repair options. + public volatile Boolean repair_primary_token_range_only; + // configures whether to force immediate repair on new nodes + public volatile Boolean force_repair_new_node; + // the maximum time in seconds that a repair session can run for a single table + // Max time for repairing one table, if exceeded, skip the table. The default is 6 * 60 * 60, which is 6 hours. + // Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. + // Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. + // Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator telling them they are violating SLA even if I am a neighbor, and it would require a lot of back-and-forth manual interventions, etc. + // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. + // Please note the repair will still run in parallel on other nodes, this is to address outliers on a given node. + public volatile DurationSpec.IntSecondsBound table_max_repair_time; + // the default is 'true'. MVs are mutated at LOCAL_ONE consistency level in Cassandra. + // This flag determines whether the auto-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. + public volatile Boolean mv_repair_enabled; + // the default is DefaultAutoRepairTokenSplitter.class.getName(). The class should implement IAutoRepairTokenRangeSplitter. + // The default implementation splits the tokens based on the token ranges owned by this node divided by the number of 'number_of_subranges' + public volatile String token_range_splitter; + + public String toString() + { + return "Options{" + + "enabled=" + enabled + + ", repair_by_keyspace=" + repair_by_keyspace + + ", number_of_subranges=" + number_of_subranges + + ", number_of_repair_threads=" + number_of_repair_threads + + ", parallel_repair_count_in_group=" + parallel_repair_count_in_group + + ", parallel_repair_percentage_in_group=" + parallel_repair_percentage_in_group + + ", sstable_upper_threshold=" + sstable_upper_threshold + + ", min_repair_interval=" + min_repair_interval + + ", ignore_dcs=" + ignore_dcs + + ", repair_primary_token_range_only=" + repair_primary_token_range_only + + ", force_repair_new_node=" + force_repair_new_node + + ", table_max_repair_time=" + table_max_repair_time + + ", mv_repair_enabled=" + mv_repair_enabled + + ", token_range_splitter=" + token_range_splitter + + '}'; + } + } + + @VisibleForTesting + protected T applyOverrides(RepairType repairType, Function optionSupplier) + { + ArrayList optsProviders = new ArrayList<>(); + if (repair_type_overrides != null) + { + optsProviders.add(repair_type_overrides.get(repairType)); + } + optsProviders.add(global_settings); + optsProviders.add(Options.defaultOptions); + + return optsProviders.stream() + .map(opt -> Optional.ofNullable(opt).map(optionSupplier).orElse(null)) + .filter(Objects::nonNull) + .findFirst() + .orElse(null); + } + + protected void ensureOverrides(RepairType repairType) + { + if (repair_type_overrides == null) + { + repair_type_overrides = new EnumMap<>(RepairType.class); + } + + if (repair_type_overrides.get(repairType) == null) + { + repair_type_overrides.put(repairType, new Options()); + } + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java new file mode 100644 index 000000000000..14cba17f6095 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.concurrent.TimeUnit; + +import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.Tables; + +public class AutoRepairKeyspace +{ + private AutoRepairKeyspace() + { + } + + /** + * Generation is used as a timestamp for automatic table creation on startup. + * If you make any changes to the tables below, make sure to increment the + * generation and document your change here. + */ + public static final long GENERATION = 0; + + public static final String AUTO_REPAIR_HISTORY = "auto_repair_history"; + + public static final String AUTO_REPAIR_PRIORITY = "auto_repair_priority"; + + private static final TableMetadata AutoRepairHistory = + parse(AUTO_REPAIR_HISTORY, + "Auto repair history for each node", + "CREATE TABLE %s (" + + "host_id uuid," + + "repair_type text," + + "repair_turn text," + + "repair_start_ts timestamp," + + "repair_finish_ts timestamp," + + "delete_hosts set," + + "delete_hosts_update_time timestamp," + + "force_repair boolean," + + "PRIMARY KEY (repair_type, host_id))"); + + private static final TableMetadata AutoRepairPriority = + parse(AUTO_REPAIR_PRIORITY, + "Auto repair priority for each group", + "CREATE TABLE %s (" + + "repair_type text," + + "repair_priority set," + + "PRIMARY KEY (repair_type))"); + + private static TableMetadata parse(String name, String description, String schema) + { + return CreateTableStatement.parse(String.format(schema, name), SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) + .id(TableId.forSystemTable(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, name)) + .comment(description) + .gcGraceSeconds((int) TimeUnit.DAYS.toSeconds(90)) + .build(); + } + + public static KeyspaceMetadata metadata() + { + Tables tables = Tables.of(AutoRepairHistory, AutoRepairPriority); + return KeyspaceMetadata.create(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, KeyspaceParams.simple(1), tables); + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java new file mode 100644 index 000000000000..cb3282c6cc60 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -0,0 +1,347 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.view.TableViews; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.repair.RepairParallelism; +import org.apache.cassandra.repair.messages.RepairOption; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.utils.concurrent.Condition; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.apache.cassandra.utils.progress.ProgressEventType; +import org.apache.cassandra.utils.progress.ProgressListener; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.SimpleDateFormat; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; + +// AutoRepairState represents the state of automated repair for a given repair type. +public abstract class AutoRepairState implements ProgressListener +{ + protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); + private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); + @VisibleForTesting + protected static Supplier timeFunc = System::currentTimeMillis; + + @VisibleForTesting + protected final RepairType repairType; + @VisibleForTesting + protected int totalTablesConsideredForRepair = 0; + @VisibleForTesting + protected long lastRepairTimeInMs; + @VisibleForTesting + protected int nodeRepairTimeInSec = 0; + @VisibleForTesting + protected int clusterRepairTimeInSec = 0; + @VisibleForTesting + protected boolean repairInProgress = false; + @VisibleForTesting + protected int repairKeyspaceCount = 0; + @VisibleForTesting + protected int repairTableSuccessCount = 0; + @VisibleForTesting + protected int repairTableSkipCount = 0; + @VisibleForTesting + protected int repairTableFailureCount = 0; + @VisibleForTesting + protected int totalMVTablesConsideredForRepair = 0; + + @VisibleForTesting + protected int totalDisabledTablesRepairCount = 0; + @VisibleForTesting + protected AutoRepairHistory longestUnrepairedNode; + @VisibleForTesting + protected Condition condition = newOneTimeCondition(); + @VisibleForTesting + protected boolean success = true; + protected final AutoRepairMetrics metrics; + + protected AutoRepairState(RepairType repairType) + { + metrics = AutoRepairMetricsManager.getMetrics(repairType); + this.repairType = repairType; + } + + public abstract RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly); + + protected RepairCoordinator getRepairRunnable(String keyspace, RepairOption options) + { + RepairCoordinator task = new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), + options, keyspace); + + task.addProgressListener(this); + + return task; + } + + @Override + public void progress(String tag, ProgressEvent event) + { + ProgressEventType type = event.getType(); + String message = String.format("[%s] %s", format.format(System.currentTimeMillis()), event.getMessage()); + if (type == ProgressEventType.ERROR) + { + logger.error("Repair failure for {} repair: {}", repairType.toString(), message); + success = false; + condition.signalAll(); + } + if (type == ProgressEventType.PROGRESS) + { + message = message + " (progress: " + (int) event.getProgressPercentage() + "%)"; + logger.debug("Repair progress for {} repair: {}", repairType.toString(), message); + } + if (type == ProgressEventType.COMPLETE) + { + success = true; + condition.signalAll(); + } + } + + public void waitForRepairToComplete() throws InterruptedException + { + //if for some reason we don't hear back on repair progress for sometime + if (!condition.await(12, TimeUnit.HOURS)) + { + success = false; + } + } + + public long getLastRepairTime() + { + return lastRepairTimeInMs; + } + + public void setTotalTablesConsideredForRepair(int count) + { + totalTablesConsideredForRepair = count; + } + + public int getTotalTablesConsideredForRepair() + { + return totalTablesConsideredForRepair; + } + + public void setLastRepairTime(long lastRepairTime) + { + lastRepairTimeInMs = lastRepairTime; + } + + public int getClusterRepairTimeInSec() + { + return clusterRepairTimeInSec; + } + + public int getNodeRepairTimeInSec() + { + return nodeRepairTimeInSec; + } + + public void setRepairInProgress(boolean repairInProgress) + { + this.repairInProgress = repairInProgress; + } + + public boolean isRepairInProgress() + { + return repairInProgress; + } + + public void setRepairSkippedTablesCount(int count) + { + repairTableSkipCount = count; + } + + public int getRepairSkippedTablesCount() + { + return repairTableSkipCount; + } + + public int getLongestUnrepairedSec() + { + if (longestUnrepairedNode == null) + { + return 0; + } + return (int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - longestUnrepairedNode.getLastRepairFinishTime()); + } + + public void setRepairFailedTablesCount(int count) + { + repairTableFailureCount = count; + } + + public int getRepairFailedTablesCount() + { + return repairTableFailureCount; + } + + public void setTotalMVTablesConsideredForRepair(int count) + { + totalMVTablesConsideredForRepair = count; + } + + public int getTotalMVTablesConsideredForRepair() + { + return totalMVTablesConsideredForRepair; + } + + public void setNodeRepairTimeInSec(int elapsed) + { + nodeRepairTimeInSec = elapsed; + } + + public void setClusterRepairTimeInSec(int seconds) + { + clusterRepairTimeInSec = seconds; + } + + public void setRepairKeyspaceCount(int count) + { + repairKeyspaceCount = count; + } + + public void setRepairTableSuccessCount(int count) + { + repairTableSuccessCount = count; + } + + public int getRepairKeyspaceCount() + { + return repairKeyspaceCount; + } + + public int getRepairTableSuccessCount() + { + return repairTableSuccessCount; + } + + public void setLongestUnrepairedNode(AutoRepairHistory longestUnrepairedNode) + { + this.longestUnrepairedNode = longestUnrepairedNode; + } + + public boolean isSuccess() + { + return success; + } + + public void recordTurn(AutoRepairUtils.RepairTurn turn) + { + metrics.recordTurn(turn); + } + + public void setTotalDisabledTablesRepairCount(int count) + { + totalDisabledTablesRepairCount = count; + } + + public int getTotalDisabledTablesRepairCount() + { + return totalDisabledTablesRepairCount; + } + + public void resetWaitCondition() + { + condition = newOneTimeCondition(); + } +} + +class IncrementalRepairState extends AutoRepairState +{ + public IncrementalRepairState() + { + super(RepairType.incremental); + } + + @Override + public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) + { + RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, true, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.NONE, true, true, false, false, false, false); + + option.getColumnFamilies().addAll(filterOutUnsafeTables(keyspace, tables)); + + return getRepairRunnable(keyspace, option); + } + + @VisibleForTesting + protected List filterOutUnsafeTables(String keyspaceName, List tables) + { + Keyspace keyspace = Keyspace.open(keyspaceName); + + return tables.stream() + .filter(table -> { + ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(table); + TableViews views = keyspace.viewManager.forTable(cfs.metadata()); + if (views != null && !views.isEmpty()) + { + logger.debug("Skipping incremental repair for {}.{} as it has materialized views", keyspaceName, table); + return false; + } + + if (cfs.metadata().params != null && cfs.metadata().params.cdc) + { + logger.debug("Skipping incremental repair for {}.{} as it has CDC enabled", keyspaceName, table); + return false; + } + + return true; + }).collect(Collectors.toList()); + } +} + +class FullRepairState extends AutoRepairState +{ + public FullRepairState() + { + super(RepairType.full); + } + + @Override + public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) + { + RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.NONE, true, true, false, false, false, false); + + option.getColumnFamilies().addAll(tables); + + return getRepairRunnable(keyspace, option); + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java new file mode 100644 index 000000000000..ea052acf1354 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -0,0 +1,812 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.MoreObjects; +import com.google.common.collect.Lists; + +import org.apache.cassandra.locator.LocalStrategy; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.cql3.statements.ModificationStatement; +import org.apache.cassandra.cql3.statements.SelectStatement; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.UUIDType; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.locator.AbstractReplicationStrategy; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.NetworkTopologyStrategy; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.ViewMetadata; +import org.apache.cassandra.serializers.SetSerializer; +import org.apache.cassandra.serializers.UUIDSerializer; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.transport.ProtocolVersion; +import org.apache.cassandra.transport.messages.ResultMessage; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; + +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; + +/** + * This class serves as a utility class for AutoRepair. It contains various helper APIs + * to store/retrieve repair status, decide whose turn is next, etc. + */ +public class AutoRepairUtils +{ + private static final Logger logger = LoggerFactory.getLogger(AutoRepairUtils.class); + static final String COL_REPAIR_TYPE = "repair_type"; + static final String COL_HOST_ID = "host_id"; + static final String COL_REPAIR_START_TS = "repair_start_ts"; + static final String COL_REPAIR_FINISH_TS = "repair_finish_ts"; + static final String COL_REPAIR_PRIORITY = "repair_priority"; + static final String COL_DELETE_HOSTS = "delete_hosts"; // this set stores the host ids which think the row should be deleted + static final String COL_REPAIR_TURN = "repair_turn"; // this record the last repair turn. Normal turn or turn due to priority + static final String COL_DELETE_HOSTS_UPDATE_TIME = "delete_hosts_update_time"; // the time when delete hosts are upated + static final String COL_FORCE_REPAIR = "force_repair"; // if set to true, the node will do non-primary range rapair + + final static String SELECT_REPAIR_HISTORY = String.format( + "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE); + final static String SELECT_REPAIR_PRIORITY = String.format( + "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); + final static String DEL_REPAIR_PRIORITY = String.format( + "DELETE %s[?] FROM %s.%s WHERE %s = ?", COL_REPAIR_PRIORITY, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); + final static String ADD_PRIORITY_HOST = String.format( + "UPDATE %s.%s SET %s = %s + ? WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_TYPE); + + final static String INSERT_NEW_REPAIR_HISTORY = String.format( + "INSERT INTO %s.%s (%s, %s, %s, %s, %s, %s) values (?, ? ,?, ?, {}, ?) IF NOT EXISTS", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, + COL_HOST_ID, COL_REPAIR_START_TS, COL_REPAIR_FINISH_TS, COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME); + + final static String ADD_HOST_ID_TO_DELETE_HOSTS = String.format( + "UPDATE %s.%s SET %s = %s + ?, %s = ? WHERE %s = ? AND %s = ? IF EXISTS" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, + COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME, COL_REPAIR_TYPE, COL_HOST_ID); + + final static String DEL_AUTO_REPAIR_HISTORY = String.format( + "DELETE FROM %s.%s WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, + COL_HOST_ID); + + final static String RECORD_START_REPAIR_HISTORY = String.format( + "UPDATE %s.%s SET %s= ?, repair_turn = ? WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_START_TS, + COL_REPAIR_TYPE, COL_HOST_ID); + + final static String RECORD_FINISH_REPAIR_HISTORY = String.format( + + "UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, + COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); + + final static String CLEAR_DELETE_HOSTS = String.format( + "UPDATE %s.%s SET %s= {} WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, + COL_REPAIR_TYPE, COL_HOST_ID); + + final static String SET_FORCE_REPAIR = String.format( + "UPDATE %s.%s SET %s=true WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_FORCE_REPAIR, + COL_REPAIR_TYPE, COL_HOST_ID); + + static ModificationStatement delStatementRepairHistory; + static SelectStatement selectStatementRepairHistory; + static ModificationStatement delStatementPriorityStatus; + static SelectStatement selectStatementRepairPriority; + static ModificationStatement addPriorityHost; + static ModificationStatement insertNewRepairHistoryStatement; + static ModificationStatement recordStartRepairHistoryStatement; + static ModificationStatement recordFinishRepairHistoryStatement; + static ModificationStatement addHostIDToDeleteHostsStatement; + static ModificationStatement clearDeleteHostsStatement; + static ModificationStatement setForceRepairStatement; + static ConsistencyLevel internalQueryCL; + + public enum RepairTurn + { + MY_TURN, + NOT_MY_TURN, + MY_TURN_DUE_TO_PRIORITY, + MY_TURN_FORCE_REPAIR + } + + public static void setup() + { + selectStatementRepairHistory = (SelectStatement) QueryProcessor.getStatement(SELECT_REPAIR_HISTORY, ClientState + .forInternalCalls()); + selectStatementRepairPriority = (SelectStatement) QueryProcessor.getStatement(SELECT_REPAIR_PRIORITY, ClientState + .forInternalCalls()); + delStatementPriorityStatus = (ModificationStatement) QueryProcessor.getStatement(DEL_REPAIR_PRIORITY, ClientState + .forInternalCalls()); + addPriorityHost = (ModificationStatement) QueryProcessor.getStatement(ADD_PRIORITY_HOST, ClientState + .forInternalCalls()); + insertNewRepairHistoryStatement = (ModificationStatement) QueryProcessor.getStatement(INSERT_NEW_REPAIR_HISTORY, ClientState + .forInternalCalls()); + recordStartRepairHistoryStatement = (ModificationStatement) QueryProcessor.getStatement(RECORD_START_REPAIR_HISTORY, ClientState + .forInternalCalls()); + recordFinishRepairHistoryStatement = (ModificationStatement) QueryProcessor.getStatement(RECORD_FINISH_REPAIR_HISTORY, ClientState + .forInternalCalls()); + addHostIDToDeleteHostsStatement = (ModificationStatement) QueryProcessor.getStatement(ADD_HOST_ID_TO_DELETE_HOSTS, ClientState + .forInternalCalls()); + setForceRepairStatement = (ModificationStatement) QueryProcessor.getStatement(SET_FORCE_REPAIR, ClientState + .forInternalCalls()); + clearDeleteHostsStatement = (ModificationStatement) QueryProcessor.getStatement(CLEAR_DELETE_HOSTS, ClientState + .forInternalCalls()); + delStatementRepairHistory = (ModificationStatement) QueryProcessor.getStatement(DEL_AUTO_REPAIR_HISTORY, ClientState + .forInternalCalls()); + Keyspace autoRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); + internalQueryCL = autoRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? + ConsistencyLevel.LOCAL_QUORUM : ConsistencyLevel.ONE; + } + + public static class AutoRepairHistory + { + UUID hostId; + String repairTurn; + long lastRepairStartTime; + long lastRepairFinishTime; + Set deleteHosts; + long deleteHostsUpdateTime; + boolean forceRepair; + + public AutoRepairHistory(UUID hostId, String repairTurn, long lastRepairStartTime, long lastRepairFinishTime, + Set deleteHosts, long deleteHostsUpateTime, boolean forceRepair) + { + this.hostId = hostId; + this.repairTurn = repairTurn; + this.lastRepairStartTime = lastRepairStartTime; + this.lastRepairFinishTime = lastRepairFinishTime; + this.deleteHosts = deleteHosts; + if (this.deleteHosts == null) + { + this.deleteHosts = new HashSet<>(); + } + this.deleteHostsUpdateTime = deleteHostsUpateTime; + this.forceRepair = forceRepair; + } + + public String toString() + { + return MoreObjects.toStringHelper(this). + add("hostId", hostId). + add("repairTurn", repairTurn). + add("lastRepairStartTime", lastRepairStartTime). + add("lastRepairFinishTime", lastRepairFinishTime). + add("deleteHosts", deleteHosts). + toString(); + } + + public boolean isRepairRunning() + { + // if a repair history record has start time laster than finish time, it means the repair is running + return lastRepairStartTime > lastRepairFinishTime; + } + + public long getLastRepairFinishTime() + { + return lastRepairFinishTime; + } + } + + public static class CurrentRepairStatus + { + public Set hostIdsWithOnGoingRepair; // hosts that is running repair + public Set hostIdsWithOnGoingForceRepair; // hosts that is running repair because of force repair + Set priority; + List historiesWithoutOnGoingRepair; // hosts that is NOT running repair + + public CurrentRepairStatus(List repairHistories, Set priority) + { + hostIdsWithOnGoingRepair = new HashSet<>(); + hostIdsWithOnGoingForceRepair = new HashSet<>(); + historiesWithoutOnGoingRepair = new ArrayList<>(); + + for (AutoRepairHistory history : repairHistories) + { + if (history.isRepairRunning()) + { + if (history.forceRepair) + { + hostIdsWithOnGoingForceRepair.add(history.hostId); + } + else + { + hostIdsWithOnGoingRepair.add(history.hostId); + } + } + else + { + historiesWithoutOnGoingRepair.add(history); + } + } + this.priority = priority; + } + + public String toString() + { + return MoreObjects.toStringHelper(this). + add("hostIdsWithOnGoingRepair", hostIdsWithOnGoingRepair). + add("hostIdsWithOnGoingForceRepair", hostIdsWithOnGoingForceRepair). + add("historiesWithoutOnGoingRepair", historiesWithoutOnGoingRepair). + add("priority", priority). + toString(); + } + } + + @VisibleForTesting + public static List getAutoRepairHistoryByGroupID(RepairType repairType) + { + UntypedResultSet repairHistoryResult; + + ResultMessage.Rows repairStatusRows = selectStatementRepairHistory.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()))), Dispatcher.RequestTime.forImmediateExecution()); + repairHistoryResult = UntypedResultSet.create(repairStatusRows.result); + + List repairHistories = new ArrayList<>(); + if (repairHistoryResult.size() > 0) + { + for (UntypedResultSet.Row row : repairHistoryResult) + { + UUID hostId = row.getUUID(COL_HOST_ID); + String repairTurn = null; + if (row.has(COL_REPAIR_TURN)) + repairTurn = row.getString(COL_REPAIR_TURN); + long lastRepairStartTime = row.getLong(COL_REPAIR_START_TS, 0); + long lastRepairFinishTime = row.getLong(COL_REPAIR_FINISH_TS, 0); + Set deleteHosts = row.getSet(COL_DELETE_HOSTS, UUIDType.instance); + long deleteHostsUpdateTime = row.getLong(COL_DELETE_HOSTS_UPDATE_TIME, 0); + Boolean forceRepair = row.has(COL_FORCE_REPAIR) ? row.getBoolean(COL_FORCE_REPAIR) : false; + repairHistories.add(new AutoRepairHistory(hostId, repairTurn, lastRepairStartTime, lastRepairFinishTime, + deleteHosts, deleteHostsUpdateTime, forceRepair)); + } + return repairHistories; + } + logger.info("No repair history found"); + return null; + } + + public static List getAutoRepairHistoryForLocalGroup(RepairType repairType) + { + return getAutoRepairHistoryByGroupID(repairType); + } + + // A host may add itself in delete hosts for some other hosts due to restart or some temp gossip issue. If a node's record + // delete_hosts is not growing for more than 2 hours, we consider it as a normal node so we clear the delete_hosts for that node + public static void clearDeleteHosts(RepairType repairType, UUID hostId) + { + clearDeleteHostsStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId))), Dispatcher.RequestTime.forImmediateExecution()); + } + + public static void setForceRepairNewNode(RepairType repairType) + { + // this function will be called when a node bootstrap finished + UUID hostId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + // insert the data first + insertNewRepairHistory(repairType, System.currentTimeMillis(), System.currentTimeMillis()); + setForceRepair(repairType, hostId); + } + + public static void setForceRepair(RepairType repairType, Set hosts) + { + // this function is used by nodetool + for (InetAddressAndPort host : hosts) + { + UUID hostId = StorageService.instance.getHostIdForEndpoint(host); + setForceRepair(repairType, hostId); + } + } + + public static void setForceRepair(RepairType repairType, UUID hostId) + { + setForceRepairStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId))), + Dispatcher.RequestTime.forImmediateExecution()); + + logger.info("Set force repair repair type: {}, node: {}", repairType, hostId); + } + + public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType) + { + List autoRepairHistories = getAutoRepairHistoryForLocalGroup(repairType); + return getCurrentRepairStatus(repairType, autoRepairHistories); + } + + public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories) + { + if (autoRepairHistories != null) + { + CurrentRepairStatus status = new CurrentRepairStatus(autoRepairHistories, getPriorityHostIds(repairType)); + + return status; + } + return null; + } + + @VisibleForTesting + protected static TreeSet getHostIdsInCurrentRing(RepairType repairType, Collection allNodesInRing) + { + TreeSet hostIdsInCurrentRing = new TreeSet<>(); + for (NodeAddresses node : allNodesInRing) + { + String nodeDC = DatabaseDescriptor.getLocator().location(node.broadcastAddress).datacenter; + if (AutoRepairService.instance.getAutoRepairConfig().getIgnoreDCs(repairType).contains(nodeDC)) + { + logger.info("Ignore node {} because its datacenter is {}", node, nodeDC); + continue; + } + /** Check if endpoint state exists in gossip or not. If it + * does not then this maybe a ghost node so ignore it + */ + if (Gossiper.instance.isAlive(node.broadcastAddress)) + { + UUID hostId = StorageService.instance.getHostIdForEndpoint(node.broadcastAddress); + hostIdsInCurrentRing.add(hostId); + } + else + { + logger.info("Node is not present in Gossipe cache node {}, node data center {}", node, nodeDC); + } + } + return hostIdsInCurrentRing; + } + + public static TreeSet getHostIdsInCurrentRing(RepairType repairType) + { + Collection allNodesInRing = ClusterMetadata.current().directory.addresses.values(); + return getHostIdsInCurrentRing(repairType, allNodesInRing); + } + + // This function will return the host ID for the node which has not been repaired for longest time + public static AutoRepairHistory getHostWithLongestUnrepairTime(RepairType repairType) + { + List autoRepairHistories = getAutoRepairHistoryForLocalGroup(repairType); + return getHostWithLongestUnrepairTime(autoRepairHistories); + } + + private static AutoRepairHistory getHostWithLongestUnrepairTime(List autoRepairHistories) + { + if (autoRepairHistories == null) + { + return null; + } + AutoRepairHistory rst = null; + long oldestTimestamp = Long.MAX_VALUE; + for (AutoRepairHistory autoRepairHistory : autoRepairHistories) + { + if (autoRepairHistory.lastRepairFinishTime < oldestTimestamp) + { + rst = autoRepairHistory; + oldestTimestamp = autoRepairHistory.lastRepairFinishTime; + } + } + return rst; + } + + public static int getMaxNumberOfNodeRunAutoRepairInGroup(RepairType repairType, int groupSize) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (groupSize == 0) + { + return Math.max(config.getParallelRepairCountInGroup(repairType), 1); + } + // we will use the max number from config between auto_repair_parallel_repair_count_in_group and auto_repair_parallel_repair_percentage_in_group + int value = Math.max(groupSize * config.getParallelRepairPercentageInGroup(repairType) / 100, + config.getParallelRepairCountInGroup(repairType)); + // make sure at least one node getting repaired + return Math.max(1, value); + } + + @VisibleForTesting + public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) + { + try + { + Collection allNodesInRing = ClusterMetadata.current().directory.addresses.values(); + logger.info("Total nodes in ring {}", allNodesInRing.size()); + TreeSet hostIdsInCurrentRing = getHostIdsInCurrentRing(repairType, allNodesInRing); + logger.info("Total nodes qualified for repair {}", hostIdsInCurrentRing.size()); + + List autoRepairHistories = getAutoRepairHistoryForLocalGroup(repairType); + Set autoRepairHistoryIds = new HashSet<>(); + + // 1. Remove any node that is not part of group based on goissip info + if (autoRepairHistories != null) + { + for (AutoRepairHistory nodeHistory : autoRepairHistories) + { + autoRepairHistoryIds.add(nodeHistory.hostId); + // clear delete_hosts if the node's delete hosts is not growing for more than two hours + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (nodeHistory.deleteHosts.size() > 0 + && config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( + System.currentTimeMillis() - nodeHistory.deleteHostsUpdateTime + )) + { + clearDeleteHosts(repairType, nodeHistory.hostId); + logger.info("Delete hosts for {} for repair type {} has not been updated for more than {} seconds. Delete hosts has been cleared. Delete hosts before clear {}" + , nodeHistory.hostId, repairType, config.getAutoRepairHistoryClearDeleteHostsBufferInterval(), nodeHistory.deleteHosts); + } + else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) + { + if (nodeHistory.deleteHosts.size() > Math.max(2, hostIdsInCurrentRing.size() * 0.5)) + { + // More than half of the groups thinks the record should be deleted + logger.info("{} think {} is orphan node, will delete auto repair history for repair type {}.", nodeHistory.deleteHosts, nodeHistory.hostId, repairType); + deleteAutoRepairHistory(repairType, nodeHistory.hostId); + } + else + { + // I think this host should be deleted + logger.info("I({}) think {} is not part of ring, vote to delete it for repair type {}.", myId, nodeHistory.hostId, repairType); + addHostIdToDeleteHosts(repairType, myId, nodeHistory.hostId); + } + } + } + } + + // 2. Add node to auto repair history table if a node is in gossip info + for (UUID hostId : hostIdsInCurrentRing) + { + if (!autoRepairHistoryIds.contains(hostId)) + { + logger.info("{} for repair type {} doesn't exist in the auto repair history table, insert a new record.", repairType, hostId); + insertNewRepairHistory(repairType, hostId, System.currentTimeMillis(), System.currentTimeMillis()); + } + } + + //get current repair status + CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); + if (currentRepairStatus != null) + { + logger.info("Latest repair status {}", currentRepairStatus); + //check if I am forced to run repair + for (AutoRepairHistory history : currentRepairStatus.historiesWithoutOnGoingRepair) + { + if (history.forceRepair && history.hostId.equals(myId)) + { + return MY_TURN_FORCE_REPAIR; + } + } + } + + int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepairInGroup(repairType, + autoRepairHistories == null ? 0 : autoRepairHistories.size()); + logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); + + if (currentRepairStatus == null || parallelRepairNumber > currentRepairStatus.hostIdsWithOnGoingRepair.size()) + { + // more repairs can be run, I might be the new one + + if (autoRepairHistories != null) + { + logger.info("Auto repair history table has {} records", autoRepairHistories.size()); + } + else + { + // try to fetch again + autoRepairHistories = getAutoRepairHistoryForLocalGroup(repairType); + currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); + if (autoRepairHistories == null || currentRepairStatus == null) + { + logger.error("No record found"); + return NOT_MY_TURN; + } + } + + // get the longest unrepaired node from the nodes which are not running repair + AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); + //check who is next, which is helpful for debugging + logger.info("Next node to be repaired for repair type {} by default: {}", repairType, defaultNodeToBeRepaired); + UUID priorityHostId = null; + if (currentRepairStatus.priority != null) + { + for (UUID priorityID : currentRepairStatus.priority) + { + // remove ids doesn't belong to this ring + if (!hostIdsInCurrentRing.contains(priorityID)) + { + logger.info("{} is not part of the current ring, will be removed from priority list.", priorityID); + removePriorityStatus(repairType, priorityID); + } + else + { + priorityHostId = priorityID; + break; + } + } + } + + if (priorityHostId != null && !myId.equals(priorityHostId)) + { + logger.info("Priority list is not empty and I'm not the first node in the list, not my turn." + + "First node in priority list is {}", ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(priorityHostId))); + return NOT_MY_TURN; + } + + if (myId.equals(priorityHostId)) + { + //I have a priority for repair hence its my turn now + return MY_TURN_DUE_TO_PRIORITY; + } + + if (defaultNodeToBeRepaired.hostId.equals(myId)) + return MY_TURN; + } + else if (currentRepairStatus.hostIdsWithOnGoingForceRepair.contains(myId)) + { + return MY_TURN_FORCE_REPAIR; + } + // for some reason I was not done with the repair hence resume (maybe node restart in-between, etc.) + return currentRepairStatus.hostIdsWithOnGoingRepair.contains(myId) ? MY_TURN : NOT_MY_TURN; + } + catch (Exception e) + { + logger.error("Exception while deciding node's turn:", e); + } + return NOT_MY_TURN; + } + + static void deleteAutoRepairHistory(RepairType repairType, UUID hostId) + { + //delete the given hostId from current local group + delStatementRepairHistory.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId))), Dispatcher.RequestTime.forImmediateExecution()); + } + + static void updateStartAutoRepairHistory(RepairType repairType, UUID myId, long timestamp, RepairTurn turn) + { + recordStartRepairHistoryStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(timestamp), + ByteBufferUtil.bytes(turn.name()), + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(myId) + )), Dispatcher.RequestTime.forImmediateExecution()); + } + + static void updateFinishAutoRepairHistory(RepairType repairType, UUID myId, long timestamp) + { + recordFinishRepairHistoryStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(timestamp), + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(myId) + )), Dispatcher.RequestTime.forImmediateExecution()); + // Do not remove beblow log, the log is used by dtest + logger.info("Auto repair finished for {}", myId); + } + + public static void insertNewRepairHistory(RepairType repairType, UUID hostId, long startTime, long finishTime) + { + try + { + Keyspace autoRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); + ConsistencyLevel cl = autoRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? + ConsistencyLevel.LOCAL_SERIAL : null; + + UntypedResultSet resultSet; + ResultMessage.Rows resultMessage = (ResultMessage.Rows) insertNewRepairHistoryStatement.execute( + QueryState.forInternalCalls(), QueryOptions.create(internalQueryCL, Lists.newArrayList( + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId), + ByteBufferUtil.bytes(startTime), + ByteBufferUtil.bytes(finishTime), + ByteBufferUtil.bytes(System.currentTimeMillis()) + ), false, -1, null, cl, ProtocolVersion.CURRENT, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME), + Dispatcher.RequestTime.forImmediateExecution()); + resultSet = UntypedResultSet.create(resultMessage.result); + boolean applied = resultSet.one().getBoolean(ModificationStatement.CAS_RESULT_COLUMN.toString()); + if (applied) + { + logger.info("Successfully inserted a new auto repair history record for host id: {}", hostId); + } + else + { + logger.info("Record exists, no need to insert again for host id: {}", hostId); + } + } + catch (Exception e) + { + logger.error("Exception in inserting new repair history:", e); + } + } + + public static void insertNewRepairHistory(RepairType repairType, long startTime, long finishTime) + { + UUID hostId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + insertNewRepairHistory(repairType, hostId, startTime, finishTime); + } + + public static void addHostIdToDeleteHosts(RepairType repairType, UUID myID, UUID hostToBeDeleted) + { + SetSerializer serializer = SetSerializer.getInstance(UUIDSerializer.instance, UTF8Type.instance.comparatorSet); + addHostIDToDeleteHostsStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(serializer.serialize(new HashSet<>(Arrays.asList(myID))), + ByteBufferUtil.bytes(System.currentTimeMillis()), + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostToBeDeleted) + )), Dispatcher.RequestTime.forImmediateExecution()); + } + + public static void addPriorityHosts(RepairType repairType, Set hosts) + { + Set hostIds = new HashSet<>(); + for (InetAddressAndPort host : hosts) + { + //find hostId from IP address + UUID hostId = ClusterMetadata.current().directory.hostId(ClusterMetadata.current().directory.peerId(host)); + //UUID hostId = StorageService.instance.getTokenMetadata().getHostId(host); + hostIds.add(hostId); + if (hostId != null) + { + logger.info("Add host {} to the priority list", hostId); + } + } + if (hostIds.size() > 0) + { + SetSerializer serializer = SetSerializer.getInstance(UUIDSerializer.instance, UTF8Type.instance.comparatorSet); + addPriorityHost.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(serializer.serialize(hostIds), + ByteBufferUtil.bytes(repairType.toString()))), + Dispatcher.RequestTime.forImmediateExecution()); + } + } + + static void removePriorityStatus(RepairType repairType, UUID hostId) + { + logger.info("Remove host {} from priority list", hostId); + delStatementPriorityStatus.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(hostId), + ByteBufferUtil.bytes(repairType.toString()))), + Dispatcher.RequestTime.forImmediateExecution()); + } + + public static Set getPriorityHostIds(RepairType repairType) + { + UntypedResultSet repairPriorityResult; + + ResultMessage.Rows repairPriorityRows = selectStatementRepairPriority.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()))), Dispatcher.RequestTime.forImmediateExecution()); + repairPriorityResult = UntypedResultSet.create(repairPriorityRows.result); + + Set priorities = null; + if (repairPriorityResult.size() > 0) + { + // there should be only one row + UntypedResultSet.Row row = repairPriorityResult.one(); + priorities = row.getSet(COL_REPAIR_PRIORITY, UUIDType.instance); + } + if (priorities != null) + { + return priorities; + } + return Collections.emptySet(); + } + + public static Set getPriorityHosts(RepairType repairType) + { + Set hosts = new HashSet<>(); + for (UUID hostId : getPriorityHostIds(repairType)) + { + hosts.add(ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(hostId)).broadcastAddress); + //hosts.add(StorageService.instance.getTokenMetadata().getEndpointForHostId(hostId)); + } + return hosts; + } + + public static boolean checkNodeContainsKeyspaceReplica(Keyspace ks) + { + AbstractReplicationStrategy replicationStrategy = ks.getReplicationStrategy(); + boolean ksReplicaOnNode = true; + if (replicationStrategy instanceof NetworkTopologyStrategy) + { + Set datacenters = ((NetworkTopologyStrategy) replicationStrategy).getDatacenters(); + String localDC = DatabaseDescriptor.getLocator().location(FBUtilities.getBroadcastAddressAndPort()).datacenter; + if (!datacenters.contains(localDC)) + { + ksReplicaOnNode = false; + } + } + if (replicationStrategy instanceof LocalStrategy) + { + ksReplicaOnNode = false; + } + return ksReplicaOnNode; + } + + + public static boolean tableMaxRepairTimeExceeded(RepairType repairType, long startTime) + { + long tableRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds + (System.currentTimeMillis() - startTime); + return AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() < + tableRepairTimeSoFar; + } + + public static boolean keyspaceMaxRepairTimeExceeded(RepairType repairType, long startTime, int numOfTablesToBeRepaired) + { + long keyspaceRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds((System.currentTimeMillis() - startTime)); + return (long) AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() * + numOfTablesToBeRepaired < keyspaceRepairTimeSoFar; + } + + public static List getAllMVs(RepairType repairType, Keyspace keyspace, TableMetadata tableMetadata) + { + List allMvs = new ArrayList<>(); + if (AutoRepairService.instance.getAutoRepairConfig().getMVRepairEnabled(repairType) && keyspace.getMetadata().views != null) + { + Iterator views = keyspace.getMetadata().views.forTable(tableMetadata.id).iterator(); + while (views.hasNext()) + { + String viewName = views.next().name(); + logger.info("Adding MV to the list {}.{}.{}", keyspace.getName(), tableMetadata.name, viewName); + allMvs.add(viewName); + } + } + return allMvs; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java new file mode 100644 index 000000000000..18231fff2af0 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.repair.autorepair; + + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.cassandra.service.AutoRepairService; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.Pair; + +public class DefaultAutoRepairTokenSplitter implements IAutoRepairTokenRangeSplitter +{ + private static final Logger logger = LoggerFactory.getLogger(DefaultAutoRepairTokenSplitter.class); + + + @Override + public List> getRange(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, String tableName) + { + List> range = new ArrayList<>(); + + Collection> tokens = StorageService.instance.getPrimaryRanges(keyspaceName); + if (!primaryRangeOnly) + { + // if we need to repair non-primary token ranges, then change the tokens accrodingly + tokens = StorageService.instance.getLocalReplicas(keyspaceName).ranges(); + } + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + int numberOfSubranges = config.getRepairSubRangeNum(repairType); + for (Range token : tokens) + { + Murmur3Partitioner.LongToken l = (Murmur3Partitioner.LongToken) (token.left); + Murmur3Partitioner.LongToken r = (Murmur3Partitioner.LongToken) (token.right); + // Token.TokenFactory factory = ClusterMetadata.current().partitioner.getTokenFactory(); + + Token parentStartToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + l.getTokenValue()); + Token parentEndToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + r.getTokenValue()); + logger.debug("Parent Token Left side {}, right side {}", parentStartToken.toString(), + parentEndToken.toString()); + + long left = (Long) l.getTokenValue(); + long right = (Long) r.getTokenValue(); + long repairTokenWidth = (right - left) / numberOfSubranges; + for (int i = 0; i < numberOfSubranges; i++) + { + long curLeft = left + (i * repairTokenWidth); + long curRight = curLeft + repairTokenWidth; + + if ((i + 1) == numberOfSubranges) + { + curRight = right; + } + + Token childStartToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + curLeft); + Token childEndToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + curRight); + logger.debug("Current Token Left side {}, right side {}", childStartToken + .toString(), childEndToken.toString()); + range.add(Pair.create(childStartToken, childEndToken)); + } + } + return range; + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java new file mode 100644 index 000000000000..2b69898a360d --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.repair.autorepair; + + +import java.util.List; + +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.utils.Pair; + +public interface IAutoRepairTokenRangeSplitter +{ + // split the token range you wish to repair into multiple subranges + // the autorepair framework will repair the list of returned subrange in a sequence + List> getRange(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, String tableName); +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java new file mode 100644 index 000000000000..50f482caf1b7 --- /dev/null +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.schema; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import com.google.common.base.MoreObjects; +import com.google.common.collect.ImmutableMap; +import org.apache.commons.lang3.StringUtils; + +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; + +import static java.lang.String.format; + +public final class AutoRepairParams +{ + public enum Option + { + ENABLED; + + @Override + public String toString() + { + return name().toLowerCase(); + } + } + + public static final Map> DEFAULT_OPTIONS = + ImmutableMap.of(AutoRepairConfig.RepairType.full, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), + AutoRepairConfig.RepairType.incremental, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); + + public final AutoRepairConfig.RepairType type; + + private Map> options = DEFAULT_OPTIONS; + + AutoRepairParams(AutoRepairConfig.RepairType type) + { + this.type = type; + } + + public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Map options) + { + Map> optionsMap = new HashMap<>(); + for (Map.Entry> entry : DEFAULT_OPTIONS.entrySet()) + { + optionsMap.put(entry.getKey(), new HashMap<>(entry.getValue())); + } + for (Map.Entry entry : options.entrySet()) + { + if (!Option.ENABLED.toString().equals(entry.getKey().toLowerCase())) + { + throw new ConfigurationException(format("Unknown property '%s'", entry.getKey())); + } + optionsMap.get(repairType).put(entry.getKey(), entry.getValue()); + } + AutoRepairParams repairParams = new AutoRepairParams(repairType); + repairParams.options = optionsMap; + return repairParams; + } + + public boolean repairEnabled() + { + String enabled = options.get(type).get(Option.ENABLED.toString()); + return enabled == null + ? Boolean.parseBoolean(DEFAULT_OPTIONS.get(type).get(Option.ENABLED.toString())) + : Boolean.parseBoolean(enabled); + } + + public void validate() + { + String enabled = options.get(type).get(Option.ENABLED.toString()); + if (enabled != null && !isValidBoolean(enabled)) + { + throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", + enabled, + Option.ENABLED)); + } + } + + public static boolean isValidBoolean(String value) + { + return StringUtils.equalsIgnoreCase(value, "true") || StringUtils.equalsIgnoreCase(value, "false"); + } + + public Map options() + { + return options.get(type); + } + + public static AutoRepairParams fromMap(AutoRepairConfig.RepairType repairType, Map map) + { + return create(repairType, map); + } + + public Map asMap() + { + return options.get(type); + } + + @Override + public String toString() + { + return MoreObjects.toStringHelper(this) + .add("options", options) + .toString(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) + return true; + + if (!(o instanceof AutoRepairParams)) + return false; + + AutoRepairParams cp = (AutoRepairParams) o; + + return options.equals(cp.options); + } + + @Override + public int hashCode() + { + return Objects.hash(options); + } +} diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 393e0ba75e5c..372da4a92a9d 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -45,6 +45,7 @@ import org.apache.cassandra.db.rows.*; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.schema.ColumnMetadata.ClusteringOrder; import org.apache.cassandra.schema.Keyspaces.KeyspacesDiff; @@ -132,6 +133,8 @@ private SchemaKeyspace() + "cdc boolean," + "read_repair text," + "fast_path frozen>," + + "automated_repair_full frozen>," + + "automated_repair_incremental frozen>," + "PRIMARY KEY ((keyspace_name), table_name))"); private static final TableMetadata Columns = @@ -216,6 +219,8 @@ private SchemaKeyspace() + "additional_write_policy text," + "cdc boolean," + "read_repair text," + + "automated_repair_full frozen>," + + "automated_repair_incremental frozen>," + "PRIMARY KEY ((keyspace_name), view_name))"); private static final TableMetadata Indexes = @@ -592,7 +597,10 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("compaction", params.compaction.asMap()) .add("compression", params.compression.asMap()) .add("read_repair", params.readRepair.toString()) - .add("extensions", params.extensions); + .add("extensions", params.extensions) + .add("automated_repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) + .add("automated_repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); + // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ // node sends table schema to a < 3.8 versioned node with an unknown column. @@ -1081,7 +1089,9 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) SpeculativeRetryPolicy.fromString("99PERCENTILE")) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) - .fastPath(getFastPathStrategy(row)); + .fastPath(getFastPathStrategy(row)) + .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, row.getFrozenTextMap("automated_repair_full"))) + .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("automated_repair_incremental"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 7c3e52943c8c..ee512841869e 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; @@ -36,6 +37,9 @@ import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.reads.PercentileSpeculativeRetryPolicy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; @@ -98,7 +102,10 @@ public enum Option FAST_PATH, TRANSACTIONAL_MODE, TRANSACTIONAL_MIGRATION_FROM, - PENDING_DROP; + PENDING_DROP, + AUTOMATED_REPAIR_FULL, + AUTOMATED_REPAIR_INCREMENTAL, + ; @Override public String toString() @@ -131,6 +138,8 @@ public String toString() public final TransactionalMigrationFromMode transactionalMigrationFrom; public final boolean pendingDrop; + public final Map automatedRepair; + private TableParams(Builder builder) { comment = builder.comment; @@ -159,6 +168,13 @@ private TableParams(Builder builder) transactionalMigrationFrom = builder.transactionalMigrationFrom; pendingDrop = builder.pendingDrop; checkNotNull(transactionalMigrationFrom); + automatedRepair = new HashMap<>() + { + { + put(AutoRepairConfig.RepairType.full, builder.automatedRepairFull); + put(AutoRepairConfig.RepairType.incremental, builder.automatedRepairIncremental); + } + }; } public static Builder builder() @@ -190,7 +206,10 @@ public static Builder builder(TableParams params) .fastPath(params.fastPath) .transactionalMode(params.transactionalMode) .transactionalMigrationFrom(params.transactionalMigrationFrom) - .pendingDrop(params.pendingDrop); + .pendingDrop(params.pendingDrop) + .automatedRepairFull(params.automatedRepair.get(AutoRepairConfig.RepairType.full)) + .automatedRepairIncremental(params.automatedRepair.get(AutoRepairConfig.RepairType.incremental)) + ; } public Builder unbuild() @@ -204,7 +223,7 @@ public void validate() compression.validate(); double minBloomFilterFpChanceValue = BloomCalculations.minSupportedBloomFilterFpChance(); - if (bloomFilterFpChance <= minBloomFilterFpChanceValue || bloomFilterFpChance > 1) + if (bloomFilterFpChance <= minBloomFilterFpChanceValue || bloomFilterFpChance > 1) { fail("%s must be larger than %s and less than or equal to 1.0 (got %s)", BLOOM_FILTER_FP_CHANCE, @@ -248,6 +267,11 @@ public void validate() if (transactionalMode.isTestMode() && !CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getBoolean()) fail("Transactional mode " + transactionalMode + " can't be used if " + CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getKey() + " is not set"); + + for (Map.Entry entry : automatedRepair.entrySet()) + { + entry.getValue().validate(); + } } private static void fail(String format, Object... args) @@ -288,7 +312,8 @@ public boolean equals(Object o) && fastPath.equals(fastPath) && transactionalMode == p.transactionalMode && transactionalMigrationFrom == p.transactionalMigrationFrom - && pendingDrop == p.pendingDrop; + && pendingDrop == p.pendingDrop + && automatedRepair == p.automatedRepair; } @Override @@ -316,7 +341,8 @@ public int hashCode() fastPath, transactionalMode, transactionalMigrationFrom, - pendingDrop); + pendingDrop, + automatedRepair); } @Override @@ -347,6 +373,8 @@ public String toString() .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) .add(PENDING_DROP.toString(), pendingDrop) + .add(Option.AUTOMATED_REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.full)) + .add(Option.AUTOMATED_REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.incremental)) .toString(); } @@ -407,7 +435,11 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) .newLine(); } - builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); + builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()) + .newLine() + .append("AND automated_repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) + .newLine() + .append("AND automated_repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); } public static final class Builder @@ -436,6 +468,9 @@ public static final class Builder public TransactionalMigrationFromMode transactionalMigrationFrom = TransactionalMigrationFromMode.none; public boolean pendingDrop = false; + private AutoRepairParams automatedRepairFull = new AutoRepairParams(AutoRepairConfig.RepairType.full); + private AutoRepairParams automatedRepairIncremental = new AutoRepairParams(AutoRepairConfig.RepairType.incremental); + public Builder() { } @@ -582,6 +617,18 @@ public Builder pendingDrop(boolean pendingDrop) this.pendingDrop = pendingDrop; return this; } + + public Builder automatedRepairFull(AutoRepairParams val) + { + automatedRepairFull = val; + return this; + } + + public Builder automatedRepairIncremental(AutoRepairParams val) + { + automatedRepairIncremental = val; + return this; + } } public static class Serializer implements MetadataSerializer diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index e507f129d4d2..694a5af1a4bb 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -50,6 +50,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -104,6 +105,7 @@ import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; import org.apache.cassandra.service.paxos.PaxosRepair; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanup; import org.apache.cassandra.service.snapshot.SnapshotManager; @@ -669,6 +671,9 @@ public boolean verifyCompactionsPendingThreshold(TimeUUID parentRepairSession, P public Future prepareForRepair(TimeUUID parentRepairSession, InetAddressAndPort coordinator, Set endpoints, RepairOption options, boolean isForcedRepair, List columnFamilyStores) { + if (!verifyDiskHeadroomThreshold(parentRepairSession, options.getPreviewKind(), options.isIncremental())) + failRepair(parentRepairSession, "Rejecting incoming repair, disk usage above threshold"); // failRepair throws exception + if (!verifyCompactionsPendingThreshold(parentRepairSession, options.getPreviewKind())) failRepair(parentRepairSession, "Rejecting incoming repair, pending compactions above threshold"); // failRepair throws exception @@ -726,6 +731,24 @@ public Future prepareForRepair(TimeUUID parentRepairSession, InetAddressAndPo return promise; } + public static boolean verifyDiskHeadroomThreshold(TimeUUID parentRepairSession, PreviewKind previewKind, boolean isIncremental) + { + if (!isIncremental) // disk headroom is required for anti-compaction which is only performed by incremental repair + return true; + + double diskUsage = DiskUsageMonitor.instance.getDiskUsage(); + double rejectRatio = ActiveRepairService.instance().getIncrementalRepairDiskHeadroomRejectRatio(); + + if (diskUsage + rejectRatio > 1) + { + logger.error("[{}] Rejecting incoming repair, disk usage ({}%) above threshold ({}%)", + previewKind.logPrefix(parentRepairSession), String.format("%.2f", diskUsage * 100), String.format("%.2f", (1 - rejectRatio) * 100)); + return false; + } + + return true; + } + private void sendPrepareWithRetries(TimeUUID parentRepairSession, AtomicInteger pending, Set failedNodes, @@ -1086,6 +1109,16 @@ public void setRepairPendingCompactionRejectThreshold(int value) DatabaseDescriptor.setRepairPendingCompactionRejectThreshold(value); } + public double getIncrementalRepairDiskHeadroomRejectRatio() + { + return DatabaseDescriptor.getIncrementalRepairDiskHeadroomRejectRatio(); + } + + public void setIncrementalRepairDiskHeadroomRejectRatio(double value) + { + DatabaseDescriptor.setIncrementalRepairDiskHeadroomRejectRatio(value); + } + /** * Remove any parent repair sessions matching predicate */ diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java new file mode 100644 index 000000000000..b07d770fb2de --- /dev/null +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.utils.MBeanWrapper; + +import java.util.Set; + +import com.google.common.annotations.VisibleForTesting; + +public class AutoRepairService implements AutoRepairServiceMBean +{ + public static final String MBEAN_NAME = "org.apache.cassandra.db:type=AutoRepairService"; + + @VisibleForTesting + protected AutoRepairConfig config; + + public static final AutoRepairService instance = new AutoRepairService(); + + @VisibleForTesting + protected AutoRepairService() + { + } + + public static void setup() + { + instance.config = DatabaseDescriptor.getAutoRepairConfig(); + } + + static + { + MBeanWrapper.instance.registerMBean(instance, MBEAN_NAME); + } + + @Override + public AutoRepairConfig getAutoRepairConfig() + { + return config; + } + + @Override + public void setAutoRepairEnabled(RepairType repairType, boolean enabled) + { + config.setAutoRepairEnabled(repairType, enabled); + } + + @Override + public void setRepairThreads(RepairType repairType, int repairThreads) + { + config.setRepairThreads(repairType, repairThreads); + } + + @Override + public void setRepairPriorityForHosts(RepairType repairType, Set hosts) + { + AutoRepairUtils.addPriorityHosts(repairType, hosts); + } + + @Override + public Set getRepairHostPriority(RepairType repairType) { + return AutoRepairUtils.getPriorityHosts(repairType); + } + + @Override + public void setForceRepairForHosts(RepairType repairType, Set hosts) + { + AutoRepairUtils.setForceRepair(repairType, hosts); + } + + @Override + public void setRepairSubRangeNum(RepairType repairType, int repairSubRanges) + { + config.setRepairSubRangeNum(repairType, repairSubRanges); + } + + @Override + public void setRepairMinInterval(RepairType repairType, String minRepairInterval) + { + config.setRepairMinInterval(repairType, minRepairInterval); + } + + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) + { + config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); + } + + @Override + public void setRepairSSTableCountHigherThreshold(RepairType repairType, int sstableHigherThreshold) + { + config.setRepairSSTableCountHigherThreshold(repairType, sstableHigherThreshold); + } + + @Override + public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) + { + config.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); + } + + @Override + public void setIgnoreDCs(RepairType repairType, Set ignoreDCs) + { + config.setIgnoreDCs(repairType, ignoreDCs); + } + + @Override + public void setPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly) + { + config.setRepairPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); + } + + @Override + public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup) + { + config.setParallelRepairPercentageInGroup(repairType, percentageInGroup); + } + + @Override + public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup) + { + config.setParallelRepairCountInGroup(repairType, countInGroup); + } + + public void setMVRepairEnabled(RepairType repairType, boolean enabled) + { + config.setMVRepairEnabled(repairType, enabled); + } +} diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java new file mode 100644 index 000000000000..5dda1b157517 --- /dev/null +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service; + +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; + +import java.util.Set; + +public interface AutoRepairServiceMBean +{ + /** + * Enable or disable auto-repair for a given repair type + */ + public void setAutoRepairEnabled(RepairType repairType, boolean enabled); + + public void setRepairThreads(RepairType repairType, int repairThreads); + + public void setRepairPriorityForHosts(RepairType repairType, Set host); + + public void setForceRepairForHosts(RepairType repairType, Set host); + + public Set getRepairHostPriority(RepairType repairType); + + public void setRepairSubRangeNum(RepairType repairType, int repairSubRangeNum); + + public void setRepairMinInterval(RepairType repairType, String minRepairInterval); + + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); + + public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssTableHigherThreshold); + + public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime); + public void setIgnoreDCs(RepairType repairType, Set ignorDCs); + + public void setPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly); + + public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup); + public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup); + + public void setMVRepairEnabled(RepairType repairType, boolean enabled); + + public AutoRepairConfig getAutoRepairConfig(); +} diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index d3c787d2e2db..69ee2c2c5d67 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -403,6 +403,8 @@ protected void setup() AuditLogManager.instance.initialize(); + StorageService.instance.doAutoRepairSetup(); + // schedule periodic background compaction task submission. this is simply a backstop against compactions stalling // due to scheduling errors or race conditions ScheduledExecutors.optionalTasks.scheduleWithFixedDelay(ColumnFamilyStore.getBackgroundCompactionTaskSubmitter(), 5, 1, TimeUnit.MINUTES); diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index c97b2ec69523..bb8a56ba00ee 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -71,6 +71,8 @@ import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; +import org.apache.cassandra.repair.autorepair.AutoRepair; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -469,7 +471,7 @@ public enum Mode { STARTING, NORMAL, JOINING, JOINING_FAILED, LEAVING, DECOMMISS private volatile int totalCFs, remainingCFs; - private static final AtomicInteger nextRepairCommand = new AtomicInteger(); + public static final AtomicInteger nextRepairCommand = new AtomicInteger(); private final List lifecycleSubscribers = new CopyOnWriteArrayList<>(); @@ -1128,6 +1130,17 @@ public void doAuthSetup(boolean async) } } + public void doAutoRepairSetup() + { + if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + logger.info("Enable auto-repair scheduling"); + AutoRepair.instance.setup(); + } + logger.info("AutoRepair setup complete!"); + } + + public boolean isAuthSetupComplete() { return authSetupComplete; diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 37b7ce627ae2..2cde23e82467 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -105,13 +105,17 @@ import org.apache.cassandra.locator.EndpointSnitchInfoMBean; import org.apache.cassandra.locator.LocationInfoMBean; import org.apache.cassandra.metrics.CIDRAuthorizerMetrics; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.CassandraMetricsRegistry; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.metrics.TableMetrics; import org.apache.cassandra.metrics.ThreadPoolMetrics; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.MessagingServiceMBean; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.ActiveRepairServiceMBean; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.AutoRepairServiceMBean; import org.apache.cassandra.service.CacheService; import org.apache.cassandra.service.CacheServiceMBean; import org.apache.cassandra.service.snapshot.SnapshotManagerMBean; @@ -179,6 +183,8 @@ public class NodeProbe implements AutoCloseable protected CIDRGroupsMappingManagerMBean cmbProxy; protected PermissionsCacheMBean pcProxy; protected RolesCacheMBean rcProxy; + protected AutoRepairServiceMBean autoRepairProxy; + protected Output output; private boolean failed; @@ -323,6 +329,9 @@ protected void connect() throws IOException name = new ObjectName(CIDRFilteringMetricsTable.MBEAN_NAME); cfmProxy = JMX.newMBeanProxy(mbeanServerConn, name, CIDRFilteringMetricsTableMBean.class); + + name = new ObjectName(AutoRepairService.MBEAN_NAME); + autoRepairProxy = JMX.newMBeanProxy(mbeanServerConn, name, AutoRepairServiceMBean.class); } catch (MalformedObjectNameException e) { @@ -2553,6 +2562,82 @@ public void abortBootstrap(String nodeId, String endpoint) { ssProxy.abortBootstrap(nodeId, endpoint); } + + public AutoRepairConfig getAutoRepairConfig() { + return autoRepairProxy.getAutoRepairConfig(); + } + + public void setAutoRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) + { + autoRepairProxy.setAutoRepairEnabled(repairType, enabled); + } + + public void setRepairThreads(AutoRepairConfig.RepairType repairType, int repairThreads) + { + autoRepairProxy.setRepairThreads(repairType, repairThreads); + } + + public void setRepairPriorityForHosts(AutoRepairConfig.RepairType repairType, Set hosts) + { + autoRepairProxy.setRepairPriorityForHosts(repairType, hosts); + } + + public Set getRepairPriorityForHosts(AutoRepairConfig.RepairType repairType) + { + return autoRepairProxy.getRepairHostPriority(repairType); + } + + public void setForceRepairForHosts(AutoRepairConfig.RepairType repairType, Set hosts){ + autoRepairProxy.setForceRepairForHosts(repairType, hosts); + } + + public void setRepairSubRangeNum(AutoRepairConfig.RepairType repairType, int repairSubRanges) + { + autoRepairProxy.setRepairSubRangeNum(repairType, repairSubRanges); + } + + public void setRepairMinInterval(AutoRepairConfig.RepairType repairType, String minRepairInterval) + { + autoRepairProxy.setRepairMinInterval(repairType, minRepairInterval); + } + + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) + { + autoRepairProxy.setAutoRepairHistoryClearDeleteHostsBufferDuration(duration); + } + + public void setRepairSSTableCountHigherThreshold(AutoRepairConfig.RepairType repairType, int ssTableHigherThreshold) + { + autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); + } + + public void setAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType repairType, String autoRepairTableMaxRepairTime) + { + autoRepairProxy.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); + } + + public void setAutoRepairIgnoreDCs(AutoRepairConfig.RepairType repairType, Set ignoreDCs) + { + autoRepairProxy.setIgnoreDCs(repairType, ignoreDCs); + } + + public void setParallelRepairPercentageInGroup(AutoRepairConfig.RepairType repairType, int percentageInGroup) { + autoRepairProxy.setParallelRepairPercentageInGroup(repairType, percentageInGroup); + } + + public void setParallelRepairCountInGroup(AutoRepairConfig.RepairType repairType, int countInGroup) { + autoRepairProxy.setParallelRepairCountInGroup(repairType, countInGroup); + } + + public void setPrimaryTokenRangeOnly(AutoRepairConfig.RepairType repairType, boolean primaryTokenRangeOnly) + { + autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); + } + + public void setMVRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) + { + autoRepairProxy.setMVRepairEnabled(repairType, enabled); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 2823bf112b03..ff49a9958de1 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -133,6 +133,7 @@ public int execute(String... args) GcStats.class, GetAuditLog.class, GetAuthCacheConfig.class, + GetAutoRepairConfig.class, GetBatchlogReplayTrottle.class, GetCIDRGroupsOfIP.class, GetColumnIndexSize.class, @@ -197,6 +198,7 @@ public int execute(String... args) Ring.class, Scrub.class, SetAuthCacheConfig.class, + SetAutoRepairConfig.class, SetBatchlogReplayThrottle.class, SetCacheCapacity.class, SetCacheKeysToSave.class, diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java new file mode 100644 index 000000000000..49ccf0f2aa24 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.tools.nodetool; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import io.airlift.airline.Command; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool.NodeToolCmd; + +import java.io.PrintStream; + +@Command(name = "getautorepairconfig", description = "Print autorepair configurations") +public class GetAutoRepairConfig extends NodeToolCmd +{ + @VisibleForTesting + protected static PrintStream out = System.out; + + @Override + public void execute(NodeProbe probe) + { + AutoRepairConfig config = probe.getAutoRepairConfig(); + if (config == null || !config.isAutoRepairSchedulingEnabled()) + { + out.println("Auto-repair is not enabled"); + return; + } + + StringBuilder sb = new StringBuilder(); + sb.append("repair scheduler configuration:"); + sb.append("\n\trepair eligibility check interval: " + config.getRepairCheckInterval()); + sb.append("\n\tTTL for repair history for dead nodes: " + config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); + for (RepairType repairType : RepairType.values()) + { + sb.append(formatRepairTypeConfig(probe, repairType, config)); + } + + out.println(sb); + } + + private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, AutoRepairConfig config) + { + StringBuilder sb = new StringBuilder(); + sb.append("\nconfiguration for repair type: " + repairType); + sb.append("\n\tenabled: " + config.isAutoRepairEnabled(repairType)); + sb.append("\n\tminimum repair interval: " + config.getRepairMinInterval(repairType)); + sb.append("\n\trepair threads: " + config.getRepairThreads(repairType)); + sb.append("\n\tnumber of repair subranges: " + config.getRepairSubRangeNum(repairType)); + sb.append("\n\tpriority hosts: " + Joiner.on(',').skipNulls().join(probe.getRepairPriorityForHosts(repairType))); + sb.append("\n\tsstable count higher threshold: " + config.getRepairSSTableCountHigherThreshold(repairType)); + sb.append("\n\ttable max repair time in sec: " + config.getAutoRepairTableMaxRepairTime(repairType)); + sb.append("\n\tignore datacenters: " + Joiner.on(',').skipNulls().join(config.getIgnoreDCs(repairType))); + sb.append("\n\trepair primary token-range: " + config.getRepairPrimaryTokenRangeOnly(repairType)); + sb.append("\n\tnumber of parallel repairs within group: " + config.getParallelRepairCountInGroup(repairType)); + sb.append("\n\tpercentage of parallel repairs within group: " + config.getParallelRepairPercentageInGroup(repairType)); + sb.append("\n\tmv repair enabled: " + config.getMVRepairEnabled(repairType)); + + return sb.toString(); + } +} diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java new file mode 100644 index 000000000000..bd00809dcf22 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.tools.nodetool; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Splitter; +import io.airlift.airline.Arguments; +import io.airlift.airline.Command; +import io.airlift.airline.Option; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool.NodeToolCmd; + +import java.io.PrintStream; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static com.google.common.base.Preconditions.checkArgument; + +@Command(name = "setautorepairconfig", description = "sets the autorepair configuration") +public class SetAutoRepairConfig extends NodeToolCmd +{ + @VisibleForTesting + @Arguments(title = " ", usage = " ", + description = "autorepair param and value.\nPossible autorepair parameters are as following: " + + "[number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only|parallel_repair_count_in_group|parallel_repair_percentage_in_group|mv_repair_enabled]", + required = true) + protected List args = new ArrayList<>(); + + @VisibleForTesting + @Option(title = "repair type", name = { "-t", "--repair-type" }, description = "Repair type") + protected RepairType repairType; + + @VisibleForTesting + protected PrintStream out = System.out; + + @Override + public void execute(NodeProbe probe) + { + checkArgument(repairType != null, "--repair-type is required."); + checkArgument(args.size() == 2, "setautorepairconfig requires param-type, and value args."); + String paramType = args.get(0); + String paramVal = args.get(1); + + if (!probe.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + out.println("Auto-repair is not enabled"); + return; + } + + if (paramType.equals("history_clear_delete_hosts_buffer_interval")) + { + probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); + return; + } + + // options below require --repair-type option + checkArgument(repairType != null, "--repair-type is required for this parameter."); + Set hosts; + switch (paramType) + { + case "enabled": + probe.setAutoRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); + break; + case "number_of_repair_threads": + probe.setRepairThreads(repairType, Integer.parseInt(paramVal)); + break; + case "number_of_subranges": + probe.setRepairSubRangeNum(repairType, Integer.parseInt(paramVal)); + break; + case "min_repair_interval": + probe.setRepairMinInterval(repairType, paramVal); + break; + case "sstable_upper_threshold": + probe.setRepairSSTableCountHigherThreshold(repairType, Integer.parseInt(paramVal)); + break; + case "table_max_repair_time": + probe.setAutoRepairTableMaxRepairTime(repairType, paramVal); + break; + case "priority_hosts": + hosts = validateLocalGroupHosts(probe, repairType, paramVal); + if (!hosts.isEmpty()) + { + probe.setRepairPriorityForHosts(repairType, hosts); + } + break; + case "forcerepair_hosts": + hosts = validateLocalGroupHosts(probe, repairType, paramVal); + if (!hosts.isEmpty()) + { + probe.setForceRepairForHosts(repairType, hosts); + } + break; + case "ignore_dcs": + Set ignoreDCs = new HashSet<>(); + for (String dc : Splitter.on(',').split(paramVal)) + { + ignoreDCs.add(dc); + } + probe.setAutoRepairIgnoreDCs(repairType, ignoreDCs); + break; + case "repair_primary_token_range_only": + probe.setPrimaryTokenRangeOnly(repairType, Boolean.parseBoolean(paramVal)); + break; + case "parallel_repair_count_in_group": + probe.setParallelRepairCountInGroup(repairType, Integer.parseInt(paramVal)); + break; + case "parallel_repair_percentage_in_group": + probe.setParallelRepairPercentageInGroup(repairType, Integer.parseInt(paramVal)); + break; + case "mv_repair_enabled": + probe.setMVRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); + break; + default: + throw new IllegalArgumentException("Unknown parameter: " + paramType); + } + } + + private Set validateLocalGroupHosts(NodeProbe probe, RepairType repairType, String paramVal) { + Set hosts = new HashSet<>(); + for (String host : Splitter.on(',').split(paramVal)) + { + try + { + hosts.add(InetAddressAndPort.getByName(host)); + } + catch (UnknownHostException e) + { + out.println("invalid ip address: " + host); + } + } + + return hosts; + } +} diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index a4493f30a201..5f5cfb7b6aea 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -65,6 +65,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -729,6 +730,12 @@ public static AbstractCryptoProvider newCryptoProvider(String className, Map listSnapshots(ColumnFamilyStore cfs) return tagSnapshotsMap; } + // Replaces the global auto-repair config with a new config where auto-repair schedulling is enabled/disabled + public static void setAutoRepairEnabled(boolean enabled) throws Exception + { + Config config = DatabaseDescriptor.getRawConfig(); + config.auto_repair = new AutoRepairConfig(enabled); + Field configField = DatabaseDescriptor.class.getDeclaredField("conf"); + configField.setAccessible(true); + configField.set(null, config); + } } diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 2c7a26026969..85433c1efe0d 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -32,6 +32,7 @@ import java.util.function.Predicate; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import org.junit.Test; import com.fasterxml.jackson.core.JsonProcessingException; @@ -46,6 +47,8 @@ import static org.apache.cassandra.config.YamlConfigurationLoader.SYSTEM_PROPERTY_PREFIX; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -108,8 +111,8 @@ public void validateTypes() assertEquals("You have wrongly defined a config parameter of abstract type DurationSpec, DataStorageSpec or DataRateSpec." + "Please check the config docs, otherwise Cassandra won't be able to start with this parameter being set in cassandra.yaml.", Arrays.stream(Config.class.getFields()) - .filter(f -> !Modifier.isStatic(f.getModifiers())) - .filter(isDurationSpec.or(isDataRateSpec).or(isDataStorageSpec)).count(), 0); + .filter(f -> !Modifier.isStatic(f.getModifiers())) + .filter(isDurationSpec.or(isDataRateSpec).or(isDataStorageSpec)).count(), 0); } @Test @@ -117,12 +120,12 @@ public void updateInPlace() { Config config = new Config(); Map map = ImmutableMap.builder().put("storage_port", 123) - .put("commitlog_sync", Config.CommitLogSync.batch) - .put("seed_provider.class_name", "org.apache.cassandra.locator.SimpleSeedProvider") - .put("client_encryption_options.cipher_suites", Collections.singletonList("FakeCipher")) - .put("client_encryption_options.optional", false) - .put("client_encryption_options.enabled", true) - .build(); + .put("commitlog_sync", Config.CommitLogSync.batch) + .put("seed_provider.class_name", "org.apache.cassandra.locator.SimpleSeedProvider") + .put("client_encryption_options.cipher_suites", Collections.singletonList("FakeCipher")) + .put("client_encryption_options.optional", false) + .put("client_encryption_options.enabled", true) + .build(); Config updated = YamlConfigurationLoader.updateFromMap(map, true, config); assert updated == config : "Config pointers do not match"; assertThat(config.storage_port).isEqualTo(123); @@ -275,6 +278,12 @@ public void fromMapTest() Map encryptionOptions = ImmutableMap.of("cipher_suites", Collections.singletonList("FakeCipher"), "optional", false, "enabled", true); + Map autoRepairConfig = ImmutableMap.of("enabled", true, + "global_settings", ImmutableMap.of("repair_dc_groups", + ImmutableSet.of("all the groups")), + "repair_type_overrides", ImmutableMap.of( + "full", ImmutableMap.of("repair_dc_groups", + ImmutableSet.of("none of the groups")))); Map map = new ImmutableMap.Builder() .put("storage_port", storagePort) .put("commitlog_sync", commitLogSync) @@ -283,6 +292,7 @@ public void fromMapTest() .put("internode_socket_send_buffer_size", "5B") .put("internode_socket_receive_buffer_size", "5B") .put("commitlog_sync_group_window_in_ms", "42") + .put("auto_repair", autoRepairConfig) .build(); Config config = YamlConfigurationLoader.fromMap(map, Config.class); @@ -293,6 +303,9 @@ public void fromMapTest() assertEquals(true, config.client_encryption_options.enabled); // Check a nested object assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_send_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) + assertEquals(true, config.auto_repair.enabled); + assertEquals(6 * 60 * 60L, config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.incremental)); + config.auto_repair.setMVRepairEnabled(AutoRepairConfig.RepairType.incremental, false); } @Test @@ -553,4 +566,4 @@ public static Config load(String path) } return new YamlConfigurationLoader().loadConfig(url); } -} +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java new file mode 100644 index 000000000000..dea0c9a827c3 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -0,0 +1,453 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.EnumMap; +import java.util.Objects; +import java.util.Set; + +import com.google.common.collect.ImmutableSet; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.Options; +import org.apache.cassandra.utils.FBUtilities; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +@RunWith(Parameterized.class) +public class AutoRepairConfigTest extends CQLTester +{ + private AutoRepairConfig config; + + private Set testSet = ImmutableSet.of("dc1"); + + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters + public static Object[] repairTypes() + { + return AutoRepairConfig.RepairType.values(); + } + + @Before + public void setUp() + { + config = new AutoRepairConfig(true); + config.repair_type_overrides = null; + } + + @Test + public void autoRepairConfigDefaultsAreNotNull() + { + AutoRepairConfig config = new AutoRepairConfig(); + assertNotNull(config.global_settings); + } + + @Test + public void autoRepairConfigRepairTypesAreNotNull() + { + AutoRepairConfig config = new AutoRepairConfig(); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + assertNotNull(config.repair_type_overrides.get(repairType)); + } + } + + @Test + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsEnabled() + { + config.global_settings.enabled = true; + + assertTrue(config.isAutoRepairEnabled(repairType)); + } + + @Test + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() + { + config = new AutoRepairConfig(false); + config.global_settings.enabled = true; + assertFalse(config.isAutoRepairEnabled(repairType)); + } + + + @Test + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledForRepairType() + { + config.global_settings.enabled = true; + config.repair_type_overrides = new EnumMap<>(AutoRepairConfig.RepairType.class); + config.repair_type_overrides.put(repairType, new Options()); + config.repair_type_overrides.get(repairType).enabled = false; + assertFalse(config.isAutoRepairEnabled(repairType)); + } + + @Test + public void testSetAutoRepairEnabledNoMVOrCDC() + { + DatabaseDescriptor.setCDCEnabled(false); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + config.setAutoRepairEnabled(repairType, true); + + assertTrue(config.repair_type_overrides.get(repairType).enabled); + } + + @Test + public void testSetAutoRepairEnabledWithMV() + { + DatabaseDescriptor.setCDCEnabled(false); + DatabaseDescriptor.setMaterializedViewsEnabled(true); + + try + { + config.setAutoRepairEnabled(repairType, true); + + if (repairType == AutoRepairConfig.RepairType.incremental) + { + assertFalse(config.repair_type_overrides.get(repairType).enabled); // IR should not be allowed with MV + assertNotEquals(AutoRepairConfig.RepairType.incremental, repairType); // should receive exception + } + else + { + assertTrue(config.repair_type_overrides.get(repairType).enabled); + } + } + catch (ConfigurationException e) + { + // should throw only if repairType is incremental + assertEquals(AutoRepairConfig.RepairType.incremental, repairType); + } + } + + @Test + public void testSetAutoRepairEnabledWithCDC() + { + DatabaseDescriptor.setCDCEnabled(true); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + + try + { + config.setAutoRepairEnabled(repairType, true); + + + if (repairType == AutoRepairConfig.RepairType.incremental) + { + assertFalse(config.repair_type_overrides.get(repairType).enabled); // IR should not be allowed with CDC + assertNotEquals(AutoRepairConfig.RepairType.incremental, repairType); // should receive exception + } + else + { + assertTrue(config.repair_type_overrides.get(repairType).enabled); + } + } + catch (ConfigurationException e) + { + // should throw only if repairType is incremental + assertEquals(AutoRepairConfig.RepairType.incremental, repairType); + } + } + + + @Test + public void testSetRepairByKeyspace() + { + config.setRepairByKeyspace(repairType, true); + + assertTrue(config.repair_type_overrides.get(repairType).repair_by_keyspace); + } + + @Test + public void testGetRepairByKeyspace() + { + config.global_settings.repair_by_keyspace = true; + + boolean result = config.getRepairByKeyspace(repairType); + + assertTrue(result); + } + + @Test + public void testSetRepairThreads() + { + config.setRepairThreads(repairType, 5); + + assert config.repair_type_overrides.get(repairType).number_of_repair_threads == 5; + } + + @Test + public void testGetRepairThreads() + { + config.global_settings.number_of_repair_threads = 5; + + int result = config.getRepairThreads(repairType); + + assertEquals(5, result); + } + + @Test + public void testGetRepairSubRangeNum() + { + config.global_settings.number_of_subranges = 5; + + int result = config.getRepairSubRangeNum(repairType); + + assertEquals(5, result); + } + + @Test + public void testSetRepairSubRangeNum() + { + config.setRepairSubRangeNum(repairType, 5); + + assert config.repair_type_overrides.get(repairType).number_of_subranges == 5; + } + + @Test + public void testGetRepairMinFrequencyInHours() + { + config.global_settings.min_repair_interval = new DurationSpec.IntSecondsBound("5s"); + + DurationSpec.IntSecondsBound result = config.getRepairMinInterval(repairType); + + assertEquals(5, result.toSeconds()); + } + + @Test + public void testSetRepairMinFrequencyInHours() + { + config.setRepairMinInterval(repairType, "5s"); + + assert config.repair_type_overrides.get(repairType).min_repair_interval.toSeconds() == 5; + } + + @Test + public void testGetAutoRepairHistoryClearDeleteHostsBufferInSec() + { + config.history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("5s"); + + int result = config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds(); + + assertEquals(5, result); + } + + @Test + public void testSetAutoRepairHistoryClearDeleteHostsBufferInSec() + { + config.setAutoRepairHistoryClearDeleteHostsBufferInterval("5s"); + + assert Objects.equals(config.history_clear_delete_hosts_buffer_interval, new DurationSpec.IntSecondsBound("5s")); + } + + @Test + public void testGetRepairSSTableCountHigherThreshold() + { + config.global_settings.sstable_upper_threshold = 5; + + int result = config.getRepairSSTableCountHigherThreshold(repairType); + + assertEquals(5, result); + } + + @Test + public void testSetRepairSSTableCountHigherThreshold() + { + config.setRepairSSTableCountHigherThreshold(repairType, 5); + + assert config.repair_type_overrides.get(repairType).sstable_upper_threshold == 5; + } + + @Test + public void testGetAutoRepairTableMaxRepairTimeInSec() + { + config.global_settings.table_max_repair_time = new DurationSpec.IntSecondsBound("5s"); + + DurationSpec.IntSecondsBound result = config.getAutoRepairTableMaxRepairTime(repairType); + + assertEquals(5, result.toSeconds()); + } + + @Test + public void testSetAutoRepairTableMaxRepairTimeInSec() + { + config.setAutoRepairTableMaxRepairTime(repairType, "5s"); + + assert config.repair_type_overrides.get(repairType).table_max_repair_time.toSeconds() == 5; + } + + @Test + public void testGetIgnoreDCs() + { + config.global_settings.ignore_dcs = testSet; + + Set result = config.getIgnoreDCs(repairType); + + assertEquals(testSet, result); + } + + @Test + public void testSetIgnoreDCs() + { + config.setIgnoreDCs(repairType, testSet); + + assertEquals(config.repair_type_overrides.get(repairType).ignore_dcs, testSet); + } + + @Test + public void testGetRepairPrimaryTokenRangeOnly() + { + config.global_settings.repair_primary_token_range_only = true; + + boolean result = config.getRepairPrimaryTokenRangeOnly(repairType); + + assertTrue(result); + } + + @Test + public void testSetRepairPrimaryTokenRangeOnly() + { + config.setRepairPrimaryTokenRangeOnly(repairType, true); + + assertTrue(config.repair_type_overrides.get(repairType).repair_primary_token_range_only); + } + + @Test + public void testGetParallelRepairPercentageInGroup() + { + config.global_settings.parallel_repair_percentage_in_group = 5; + + int result = config.getParallelRepairPercentageInGroup(repairType); + + assertEquals(5, result); + } + + @Test + public void testSetParallelRepairPercentageInGroup() + { + config.setParallelRepairPercentageInGroup(repairType, 5); + + assert config.repair_type_overrides.get(repairType).parallel_repair_percentage_in_group == 5; + } + + @Test + public void testGetParallelRepairCountInGroup() + { + config.global_settings.parallel_repair_count_in_group = 5; + + int result = config.getParallelRepairCountInGroup(repairType); + + assertEquals(5, result); + } + + @Test + public void testSetParallelRepairCountInGroup() + { + config.setParallelRepairCountInGroup(repairType, 5); + + assert config.repair_type_overrides.get(repairType).parallel_repair_count_in_group == 5; + } + + @Test + public void testGetMVRepairEnabled() + { + config.global_settings.mv_repair_enabled = true; + + boolean result = config.getMVRepairEnabled(repairType); + + assertTrue(result); + } + + @Test + public void testSetMVRepairEnabled() + { + config.setMVRepairEnabled(repairType, true); + + assertTrue(config.repair_type_overrides.get(repairType).mv_repair_enabled); + } + + @Test + public void testSetForceRepairNewNode() + { + config.setForceRepairNewNode(repairType, true); + + assertTrue(config.repair_type_overrides.get(repairType).force_repair_new_node); + } + + @Test + public void testGetForceRepairNewNode() + { + config.global_settings.force_repair_new_node = true; + + boolean result = config.getForceRepairNewNode(repairType); + + assertTrue(result); + } + + @Test + public void testIsAutoRepairSchedulingEnabledDefault() + { + config = new AutoRepairConfig(); + + boolean result = config.isAutoRepairSchedulingEnabled(); + + assertFalse(result); + } + + @Test + public void testIsAutoRepairSchedulingEnabledTrue() + { + boolean result = config.isAutoRepairSchedulingEnabled(); + + assertTrue(result); + } + + @Test + public void testGetDefaultOptionsMVRepairIsEnabledByDefault() + { + Options defaultOptions = Options.getDefaultOptions(); + + assertTrue(defaultOptions.mv_repair_enabled); + } + + @Test + public void testGetDefaultOptionsTokenRangeSplitter() + { + Options defaultOptions = Options.getDefaultOptions(); + + assertEquals(DefaultAutoRepairTokenSplitter.class.getName(),defaultOptions.token_range_splitter); + assertEquals(DefaultAutoRepairTokenSplitter.class.getName(), FBUtilities.newAutoRepairTokenRangeSplitter(defaultOptions.token_range_splitter).getClass().getName()); + } + + @Test(expected = ConfigurationException.class) + public void testInvalidTokenRangeSplitter() + { + assertEquals(DefaultAutoRepairTokenSplitter.class.getName(), FBUtilities.newAutoRepairTokenRangeSplitter("invalid-class").getClass().getName()); + } + +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java new file mode 100644 index 000000000000..579b996a12f5 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Optional; +import java.util.Set; + +import com.google.common.collect.ImmutableSet; + +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.TableMetadata; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; + +public class AutoRepairKeyspaceTest +{ + private static final Set tables = ImmutableSet.of( + AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + AutoRepairKeyspace.AUTO_REPAIR_PRIORITY + ); + + @BeforeClass + public static void setupDatabaseDescriptor() + { + DatabaseDescriptor.daemonInitialization(); + } + + + @Test + public void testMetadataCanParseSchemas() throws Exception + { + setAutoRepairEnabled(true); + KeyspaceMetadata keyspaceMetadata = AutoRepairKeyspace.metadata(); + + assert keyspaceMetadata.tables.size() == tables.size() : "Expected " + tables.size() + " tables, got " + keyspaceMetadata.tables.size(); + + for (String table : tables) + { + Optional tableMetadata = keyspaceMetadata.tables.get(table); + + assert tableMetadata.isPresent() : "Table " + table + " not found in metadata"; + } + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java new file mode 100644 index 000000000000..0329454fb381 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -0,0 +1,583 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; + +import org.apache.cassandra.cql3.statements.schema.TableAttributes; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.schema.AutoRepairParams; +import org.apache.cassandra.schema.TableParams; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.Pair; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import junit.framework.Assert; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.marshal.IntegerType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.Util.token; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@RunWith(Parameterized.class) +public class AutoRepairParameterizedTest extends CQLTester +{ + private static final String KEYSPACE = "ks"; + private static final String TABLE = "tbl"; + private static final String TABLE_DISABLED_AUTO_REPAIR = "tbl_disabled_auto_repair"; + private static final String MV = "mv"; + private static TableMetadata cfm; + private static TableMetadata cfmDisabledAutoRepair; + private static Keyspace keyspace; + private static int timeFuncCalls; + @Mock + ScheduledExecutorPlus mockExecutor; + @Mock + ProgressEvent progressEvent; + @Mock + AutoRepairState autoRepairState; + @Mock + RepairCoordinator repairRunnable; + private static AutoRepairConfig defaultConfig; + + + @Parameterized.Parameter() + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters(name = "repairType={0}") + public static Collection repairTypes() + { + return Arrays.asList(AutoRepairConfig.RepairType.values()); + } + + @BeforeClass + public static void setupClass() throws Exception + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + setAutoRepairEnabled(true); + requireNetwork(); + AutoRepairUtils.setup(); + + + cfm = TableMetadata.builder(KEYSPACE, TABLE) + .addPartitionKeyColumn("k", UTF8Type.instance) + .addStaticColumn("s", UTF8Type.instance) + .addClusteringColumn("i", IntegerType.instance) + .addRegularColumn("v", UTF8Type.instance) + .params(TableParams.builder().automatedRepairFull(AutoRepairParams.create(AutoRepairConfig.RepairType.full, ImmutableMap.of(AutoRepairParams.Option.ENABLED.toString(), Boolean.toString(true)))). + automatedRepairIncremental(AutoRepairParams.create(AutoRepairConfig.RepairType.incremental, ImmutableMap.of(AutoRepairParams.Option.ENABLED.toString(), Boolean.toString(true)))).build()) + .build(); + + cfmDisabledAutoRepair = TableMetadata.builder(KEYSPACE, TABLE_DISABLED_AUTO_REPAIR) + .addPartitionKeyColumn("k", UTF8Type.instance) + .addStaticColumn("s", UTF8Type.instance) + .addClusteringColumn("i", IntegerType.instance) + .addRegularColumn("v", UTF8Type.instance) + .params(TableParams.builder().automatedRepairFull(AutoRepairParams.create(AutoRepairConfig.RepairType.full, ImmutableMap.of(AutoRepairParams.Option.ENABLED.toString(), Boolean.toString(false)))). + automatedRepairIncremental(AutoRepairParams.create(AutoRepairConfig.RepairType.incremental, ImmutableMap.of(AutoRepairParams.Option.ENABLED.toString(), Boolean.toString(false)))).build()) + .build(); + + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace(KEYSPACE, KeyspaceParams.simple(1), cfm, cfmDisabledAutoRepair); + cfm = Schema.instance.getTableMetadata(KEYSPACE, TABLE); + cfmDisabledAutoRepair = Schema.instance.getTableMetadata(KEYSPACE, TABLE_DISABLED_AUTO_REPAIR); + keyspace = Keyspace.open(KEYSPACE); + DatabaseDescriptor.setMaterializedViewsEnabled(true); + QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); + + defaultConfig = new AutoRepairConfig(true); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + defaultConfig.setAutoRepairEnabled(repairType, true); + defaultConfig.setMVRepairEnabled(repairType, false); + } + } + + @Before + public void setup() + { + MockitoAnnotations.initMocks(this); + + Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).truncateBlocking(); + Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).disableAutoCompaction(); + + Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).truncateBlocking(); + Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).disableAutoCompaction(); + + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(AutoRepairKeyspace.AUTO_REPAIR_PRIORITY).truncateBlocking(); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(AutoRepairKeyspace.AUTO_REPAIR_HISTORY).truncateBlocking(); + + + AutoRepair.instance = new AutoRepair(); + executeCQL(); + + timeFuncCalls = 0; + AutoRepair.timeFunc = System::currentTimeMillis; + resetCounters(); + resetConfig(); + } + + + private void resetCounters() + { + AutoRepairMetrics metrics = AutoRepairMetricsManager.getMetrics(repairType); + Metrics.removeMatching((name, metric) -> name.startsWith("repairTurn")); + metrics.repairTurnMyTurn = Metrics.counter(String.format("repairTurnMyTurn-%s", repairType)); + metrics.repairTurnMyTurnForceRepair = Metrics.counter(String.format("repairTurnMyTurnForceRepair-%s", repairType)); + metrics.repairTurnMyTurnDueToPriority = Metrics.counter(String.format("repairTurnMyTurnDueToPriority-%s", repairType)); + } + + private void resetConfig() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.repair_type_overrides = defaultConfig.repair_type_overrides; + config.global_settings = defaultConfig.global_settings; + config.history_clear_delete_hosts_buffer_interval = defaultConfig.history_clear_delete_hosts_buffer_interval; + config.setRepairSubRangeNum(repairType, 1); + } + + private void executeCQL() + { + QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, s) VALUES ('k', 's')"); + QueryProcessor.executeInternal("SELECT s FROM ks.tbl WHERE k='k'"); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) + .getColumnFamilyStore(AutoRepairKeyspace.AUTO_REPAIR_PRIORITY) + .forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + } + + + @Test + public void testRepairAsync() + { + AutoRepair.instance.repairExecutors.put(repairType, mockExecutor); + + AutoRepair.instance.repairAsync(repairType, 60); + + verify(mockExecutor, Mockito.times(1)).submit(Mockito.any(Runnable.class)); + + } + + @Test + public void testRepairTurn() + { + UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + } + + @Test + public void testRepair() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType, 0); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + long lastRepairTime = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + //if repair was done then lastRepairTime should be non-zero + Assert.assertTrue(String.format("Expected lastRepairTime > 0, actual value lastRepairTime %d", + lastRepairTime), lastRepairTime > 0); + } + + @Test + public void testTooFrequentRepairs() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + //in the first round let repair run + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType, 0); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), + consideredTables, 0); + + //if repair was done in last 24 hours then it should not trigger another repair + config.setRepairMinInterval(repairType, "24h"); + AutoRepair.instance.repair(repairType, 0); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + Assert.assertEquals(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); + consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + Assert.assertEquals("Expected total repaired tables = 0, actual value: " + consideredTables, + consideredTables, 0); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + } + + @Test + public void testNonFrequentRepairs() + { + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + long prevCount = state.getTotalMVTablesConsideredForRepair(); + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType, 0); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + Assert.assertTrue(String.format("Expected lastRepairTime1 > 0, actual value lastRepairTime1 %d", + lastRepairTime1), lastRepairTime1 > 0); + UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + Assert.assertTrue("Expected my turn for the repair", + AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + AutoRepair.instance.repair(repairType, 0); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + Assert.assertNotSame(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + + "lastRepairTime2 ", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); + assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + } + + @Test + public void testGetPriorityHosts() + { + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + long prevCount = state.getTotalMVTablesConsideredForRepair(); + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + Assert.assertSame(String.format("Priority host count is not same, actual value %d, expected value %d", + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); + UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != + NOT_MY_TURN); + AutoRepair.instance.repair(repairType, 0); + AutoRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); + AutoRepair.instance.repair(repairType, 0); + Assert.assertSame(String.format("Priority host count is not same actual value %d, expected value %d", + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); + assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + } + + @Test + public void testCheckAutoRepairStartStop() throws Throwable + { + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + long prevCount = state.getTotalMVTablesConsideredForRepair(); + config.setRepairMinInterval(repairType, "0s"); + config.setAutoRepairEnabled(repairType, false); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + AutoRepair.instance.repair(repairType, 0); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + //Since repair has not happened, both the last repair times should be same + Assert.assertEquals(String.format("Expected lastRepairTime1 %d, and lastRepairTime2 %d to be same", + lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); + + config.setAutoRepairEnabled(repairType, true); + AutoRepair.instance.repair(repairType, 0); + //since repair is done now, so lastRepairTime1/lastRepairTime2 and lastRepairTime3 should not be same + long lastRepairTime3 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + Assert.assertNotSame(String.format("Expected lastRepairTime1 %d, and lastRepairTime3 %d to be not same", + lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime3); + assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + } + + @Test + public void testRepairPrimaryRangesByDefault() + { + Assert.assertTrue("Expected primary range repair only", + AutoRepairService.instance.getAutoRepairConfig().getRepairPrimaryTokenRangeOnly(repairType)); + } + + @Test + public void testGetAllMVs() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMVRepairEnabled(repairType, false); + assertFalse(config.getMVRepairEnabled(repairType)); + assertEquals(0, AutoRepairUtils.getAllMVs(repairType, keyspace, cfm).size()); + + config.setMVRepairEnabled(repairType, true); + + assertTrue(config.getMVRepairEnabled(repairType)); + assertEquals(Arrays.asList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); + config.setMVRepairEnabled(repairType, false); + } + + + @Test + public void testMVRepair() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMVRepairEnabled(repairType, true); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + AutoRepair.instance.repair(repairType, 0); + assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + + config.setMVRepairEnabled(repairType, false); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + AutoRepair.instance.repair(repairType, 0); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + + config.setMVRepairEnabled(repairType, true); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + AutoRepair.instance.repair(repairType, 0); + assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + + config.setMVRepairEnabled(repairType, false); + } + + @Test + public void testSkipRepairSSTableCountHigherThreshold() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + ColumnFamilyStore cfsBaseTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE); + ColumnFamilyStore cfsMVTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(MV); + Set preBaseTable = cfsBaseTable.getLiveSSTables(); + Set preMVTable = cfsBaseTable.getLiveSSTables(); + + for (int i = 0; i < 10; i++) + { + QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (k, i, v) VALUES('k1', %d, 'v1')", KEYSPACE, TABLE, i)); + cfsBaseTable.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + cfsMVTable.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + } + + Set postBaseTable = cfsBaseTable.getLiveSSTables(); + Set diffBaseTable = new HashSet<>(postBaseTable); + diffBaseTable.removeAll(preBaseTable); + assert diffBaseTable.size() == 10; + + Set postMVTable = cfsBaseTable.getLiveSSTables(); + Set diffMVTable = new HashSet<>(postMVTable); + diffMVTable.removeAll(preMVTable); + assert diffMVTable.size() == 10; + + int beforeCount = config.getRepairSSTableCountHigherThreshold(repairType); + config.setMVRepairEnabled(repairType, true); + config.setRepairSSTableCountHigherThreshold(repairType, 9); + assertEquals(0, state.getRepairSkippedTablesCount()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + state.setLastRepairTime(0); + AutoRepair.instance.repair(repairType, 0); + assertEquals(1, state.getTotalMVTablesConsideredForRepair()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + // skipping one time for the base table and another time for MV table + assertEquals(2, state.getRepairSkippedTablesCount()); + assertEquals(2, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + + // set it to higher value, and this time, the tables should not be skipped + config.setRepairSSTableCountHigherThreshold(repairType, 11); + config.setRepairSSTableCountHigherThreshold(repairType, beforeCount); + state.setLastRepairTime(0); + AutoRepair.instance.repair(repairType, 0); + assertEquals(1, state.getTotalMVTablesConsideredForRepair()); + assertEquals(0, state.getRepairSkippedTablesCount()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + + config.setMVRepairEnabled(repairType, false); + config.setRepairSSTableCountHigherThreshold(repairType, beforeCount); + } + + @Test + public void testGetRepairState() + { + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getRepairKeyspaceCount()); + + AutoRepairState state = AutoRepair.instance.getRepairState(repairType); + state.setRepairKeyspaceCount(100); + + assertEquals(100L, AutoRepair.instance.getRepairState(repairType).getRepairKeyspaceCount()); + } + + @Test + public void testMetrics() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMVRepairEnabled(repairType, true); + config.setRepairMinInterval(repairType, "0s"); + config.setAutoRepairTableMaxRepairTime(repairType, "0s"); + AutoRepair.timeFunc = () -> { + timeFuncCalls++; + return timeFuncCalls * 1000L; + }; + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(1000L); + + AutoRepair.instance.repair(repairType, 0); + + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).nodeRepairTimeInSec.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).clusterRepairTimeInSec.getValue() > 0); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).repairTurnMyTurn.getCount()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue() > 0); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue().intValue()); + + config.setAutoRepairTableMaxRepairTime(repairType, String.valueOf(Integer.MAX_VALUE-1) + 's'); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) + .thenReturn(repairRunnable); + when(autoRepairState.getRepairFailedTablesCount()).thenReturn(10); + when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); + + AutoRepair.instance.repair(repairType, 0); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).failedTablesCount.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue() > 0); + } + + @Test + public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws Exception + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMVRepairEnabled(repairType, false); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) + .thenReturn(repairRunnable); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + when(autoRepairState.getLastRepairTime()).thenReturn((long) 0); + AtomicInteger resetWaitConditionCalls = new AtomicInteger(); + AtomicInteger waitForRepairCompletedCalls = new AtomicInteger(); + doAnswer(invocation -> { + resetWaitConditionCalls.getAndIncrement(); + assertEquals("waitForRepairToComplete was called before resetWaitCondition", + resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get() + 1); + return null; + }).when(autoRepairState).resetWaitCondition(); + doAnswer(invocation -> { + waitForRepairCompletedCalls.getAndIncrement(); + assertEquals("resetWaitCondition was not called before waitForRepairToComplete", + resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get()); + return null; + }).when(autoRepairState).waitForRepairToComplete(); + + AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType, 0); + } + + @Test + public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() + { + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairMinInterval(repairType, "0s"); + int disabledTablesRepairCountBefore = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); + AutoRepair.instance.repair(repairType, 0); + int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), + consideredTables, 0); + int disabledTablesRepairCountAfter = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); + Assert.assertTrue(String.format("A table %s should be skipped from auto repair, expected value: %d, actual value %d ", TABLE_DISABLED_AUTO_REPAIR, disabledTablesRepairCountBefore + 1, disabledTablesRepairCountAfter), + disabledTablesRepairCountBefore < disabledTablesRepairCountAfter); + } + + @Test + public void testTokenRangesNoSplit() + { + Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + assertEquals(1, tokens.size()); + List> expectedToken = new ArrayList<>(); + expectedToken.addAll(tokens); + + List> ranges = new DefaultAutoRepairTokenSplitter().getRange(repairType, true, KEYSPACE, TABLE); + assertEquals(1, ranges.size()); + assertEquals(expectedToken.get(0).left, ranges.get(0).left); + assertEquals(expectedToken.get(0).right, ranges.get(0).right); + } + + @Test + public void testTokenRangesSplit() + { + Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + assertEquals(1, tokens.size()); + List> expectedToken = new ArrayList<>(); + expectedToken.addAll(tokens); + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairSubRangeNum(repairType, 4); + List> ranges = new DefaultAutoRepairTokenSplitter().getRange(repairType, true, KEYSPACE, TABLE); + assertEquals(4, ranges.size()); + } + + @Test + public void testTableAttribute() + { + assertTrue(TableAttributes.validKeywords().contains("automated_repair_full")); + assertTrue(TableAttributes.validKeywords().contains("automated_repair_incremental")); + } + + @Test + public void testDefaultAutomatedRepair() + { + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java new file mode 100644 index 000000000000..a0e5bdc45294 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import org.junit.Test; + +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +public class AutoRepairStateFactoryTest +{ + @Test + public void testGetRepairState() { + AutoRepairState state = RepairType.getAutoRepairState(RepairType.full); + + assert state instanceof FullRepairState; + + state = RepairType.getAutoRepairState(RepairType.incremental); + + assert state instanceof IncrementalRepairState; + } + + @Test + public void testGetRepairStateSupportsAllRepairTypes() { + for (RepairType repairType : RepairType.values()) { + try { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + assertNotNull(state); + } catch (IllegalArgumentException e) { + assertNull(e); + } + } + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java new file mode 100644 index 000000000000..972b6339954f --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -0,0 +1,382 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Arrays; +import java.util.Collection; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.utils.concurrent.Condition; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.apache.cassandra.utils.progress.ProgressEventType; +import org.mockito.Mock; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.initMocks; + +@RunWith(Parameterized.class) +public class AutoRepairStateTest extends CQLTester +{ + private static final String testTable = "test"; + + @Parameterized.Parameter + public RepairType repairType; + + @Mock + ProgressEvent progressEvent; + + @Parameterized.Parameters + public static Collection repairTypes() + { + return Arrays.asList(RepairType.values()); + } + + @Before + public void setUp() { + initMocks(this); + createTable(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int)", KEYSPACE, testTable)); + } + + @Test + public void testGetRepairRunnable() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + AutoRepairService.setup(); + + Runnable runnable = state.getRepairRunnable(KEYSPACE, ImmutableList.of(testTable), ImmutableSet.of(), false); + + assertNotNull(runnable); + } + + @Test + public void testProgressError() throws InterruptedException + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); + + state.progress("test", progressEvent); + + assertFalse(state.success); + assertTrue(state.condition.await(0, TimeUnit.MILLISECONDS)); + } + + @Test + public void testProgress_progress() throws InterruptedException + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + when(progressEvent.getType()).thenReturn(ProgressEventType.PROGRESS); + + state.progress("test", progressEvent); + + assertTrue(state.success); + assertFalse(state.condition.await(0, TimeUnit.MILLISECONDS)); + } + + + @Test + public void testProgress_complete() throws InterruptedException + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + when(progressEvent.getType()).thenReturn(ProgressEventType.COMPLETE); + + state.progress("test", progressEvent); + + assertTrue(state.success); + assertTrue(state.condition.await(1, TimeUnit.MILLISECONDS)); + } + + @Test + public void testWaitForRepairToComplete() throws Exception + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.condition.signalAll(); + Condition finishedCondition = Condition.newOneTimeCondition(); + Callable waitForRepairToComplete = () -> { + state.waitForRepairToComplete(); + finishedCondition.signalAll(); + return null; + }; + + waitForRepairToComplete.call(); + + assertTrue(finishedCondition.await(1, TimeUnit.MILLISECONDS)); + } + + @Test + public void testGetLastRepairTime() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.lastRepairTimeInMs = 1; + + assertEquals(1, state.getLastRepairTime()); + } + + @Test + public void testSetTotalTablesConsideredForRepair() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setTotalTablesConsideredForRepair(1); + + assertEquals(1, state.totalTablesConsideredForRepair); + } + + @Test + public void testGetTotalTablesConsideredForRepair() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.totalTablesConsideredForRepair = 1; + + assertEquals(1, state.getTotalTablesConsideredForRepair()); + } + + @Test + public void testSetLastRepairTimeInMs() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setLastRepairTime(1); + + assertEquals(1, state.lastRepairTimeInMs); + } + + @Test + public void testGetClusterRepairTimeInSec() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.clusterRepairTimeInSec = 1; + + assertEquals(1, state.getClusterRepairTimeInSec()); + } + + @Test + public void testGetNodeRepairTimeInSec() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.nodeRepairTimeInSec = 1; + + assertEquals(1, state.getNodeRepairTimeInSec()); + } + + @Test + public void testSetRepairInProgress() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setRepairInProgress(true); + + assertTrue(state.repairInProgress); + } + + @Test + public void testIsRepairInProgress() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.repairInProgress = true; + + assertTrue(state.isRepairInProgress()); + } + + @Test + public void testSetRepairSkippedTablesCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setRepairSkippedTablesCount(1); + + assertEquals(1, state.repairTableSkipCount); + } + + @Test + public void testGetRepairSkippedTablesCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.repairTableSkipCount = 1; + + assertEquals(1, state.getRepairSkippedTablesCount()); + } + + @Test + public void testGetLongestUnrepairedSecNull() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.longestUnrepairedNode = null; + + try + { + assertEquals(0, state.getLongestUnrepairedSec()); + } catch (Exception e) { + assertNull(e); + } + } + + @Test + public void testGetLongestUnrepairedSec() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.longestUnrepairedNode = new AutoRepairHistory(UUID.randomUUID(), "", 0, 1000, + null, 0, false); + AutoRepairState.timeFunc = () -> 2000L; + + try + { + assertEquals(1, state.getLongestUnrepairedSec()); + } catch (Exception e) { + assertNull(e); + } + } + + @Test + public void testSetRepairFailedTablesCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setRepairFailedTablesCount(1); + + assertEquals(1, state.repairTableFailureCount); + } + + @Test + public void testGetRepairFailedTablesCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.repairTableFailureCount = 1; + + assertEquals(1, state.getRepairFailedTablesCount()); + } + + @Test + public void testSetTotalMVTablesConsideredForRepair() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setTotalMVTablesConsideredForRepair(1); + + assertEquals(1, state.totalMVTablesConsideredForRepair); + } + + @Test + public void testGetTotalMVTablesConsideredForRepair() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.totalMVTablesConsideredForRepair = 1; + + assertEquals(1, state.getTotalMVTablesConsideredForRepair()); + } + + @Test + public void testSetNodeRepairTimeInSec() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setNodeRepairTimeInSec(1); + + assertEquals(1, state.nodeRepairTimeInSec); + } + + @Test + public void testSetClusterRepairTimeInSec() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setClusterRepairTimeInSec(1); + + assertEquals(1, state.clusterRepairTimeInSec); + } + + @Test + public void testSetRepairKeyspaceCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setRepairKeyspaceCount(1); + + assertEquals(1, state.repairKeyspaceCount); + } + + @Test + public void testSetRepairTableSuccessCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setRepairTableSuccessCount(1); + + assertEquals(1, state.repairTableSuccessCount); + } + + @Test + public void testGetRepairKeyspaceCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.repairKeyspaceCount = 1; + + assertEquals(1, state.getRepairKeyspaceCount()); + } + + @Test + public void testGetRepairTableSuccessCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.repairTableSuccessCount = 1; + + assertEquals(1, state.getRepairTableSuccessCount()); + } + + @Test + public void testSetLongestUnrepairedNode() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); + + state.setLongestUnrepairedNode(history); + + assertEquals(history, state.longestUnrepairedNode); + } + + @Test + public void isSuccess() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.success = true; + + assertTrue(state.isSuccess()); + + state.success = false; + + assertFalse(state.isSuccess()); + } + + @Test + public void testWaitForRepairToCompleteDoesNotSetSuccessWhenProgressReceivesError() throws InterruptedException + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); + + state.progress("test", progressEvent); + assertFalse(state.success); + + state.waitForRepairToComplete(); + assertFalse(state.success); + } + + @Test + public void testResetWaitCondition() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.condition.signalAll(); + assertTrue(state.condition.isSignalled()); + + state.resetWaitCondition(); + + assertFalse(state.condition.isSignalled()); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java new file mode 100644 index 000000000000..b16e0ea5b491 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.BeforeClass; +import org.junit.Test; + +import junit.framework.Assert; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.schema.SchemaTestUtil; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertEquals; + +public class AutoRepairTest extends CQLTester +{ + @BeforeClass + public static void setupClass() throws Exception + { + setAutoRepairEnabled(true); + requireNetwork(); + } + + @Test + public void testSetup() + { + AutoRepair instance = new AutoRepair(); + + instance.setup(); + + assertEquals(RepairType.values().length, instance.repairExecutors.size()); + for (RepairType repairType : instance.repairExecutors.keySet()) + { + assertEquals(String.format("Expected 1 task in queue for %s", repairType), + 1, instance.repairExecutors.get(repairType).getPendingTaskCount() + + instance.repairExecutors.get(repairType).getActiveTaskCount()); + } + } + + @Test(expected = ConfigurationException.class) + public void testSetupFailsWhenIREnabledWithCDC() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.setCDCEnabled(true); + + AutoRepair instance = new AutoRepair(); + instance.setup(); + } + + @Test(expected = ConfigurationException.class) + public void testSetupFailsWhenIREnabledWithMV() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.setMaterializedViewsEnabled(true); + + AutoRepair instance = new AutoRepair(); + instance.setup(); + } + + @Test + public void testCheckNTSreplicationNodeInsideOutsideDC() + { + String ksname1 = "ks_nts1"; + String ksname2 = "ks_nts2"; + Map configOptions1 = new HashMap<>(); + configOptions1.put("datacenter1", "3"); + configOptions1.put(ReplicationParams.CLASS, "NetworkTopologyStrategy"); + SchemaTestUtil.addOrUpdateKeyspace(KeyspaceMetadata.create(ksname1, KeyspaceParams.create(false, configOptions1)), false); + Map configOptions2 = new HashMap<>(); + configOptions2.put("datacenter2", "3"); + configOptions2.put(ReplicationParams.CLASS, "NetworkTopologyStrategy"); + SchemaTestUtil.addOrUpdateKeyspace(KeyspaceMetadata.create(ksname2, KeyspaceParams.create(false, configOptions2)), false); + + for (Keyspace ks : Keyspace.all()) + { + if (ks.getName().equals(ksname1)) + { + // case 1 : + // node reside in "datacenter1" + // keyspace has replica in "datacenter1" + Assert.assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + } + else if (ks.getName().equals(ksname2)) + { + // case 2 : + // node reside in "datacenter1" + // keyspace has replica in "datacenter2" + Assert.assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + } + } + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java new file mode 100644 index 000000000000..b474697cfb5f --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -0,0 +1,479 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.UUIDType; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.locator.IEndpointSnitch; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Locator; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.CurrentRepairStatus; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.utils.FBUtilities; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_DELETE_HOSTS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_FORCE_REPAIR; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_FINISH_TS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_START_TS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_TURN; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +public class AutoRepairUtilsTest extends CQLTester +{ + static RepairType repairType = RepairType.incremental; + static UUID hostId; + + static InetAddressAndPort localEndpoint; + + @Mock + static Locator snitchMock; + + static Locator defaultSnitch; + + + @BeforeClass + public static void setupClass() throws Exception + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + setAutoRepairEnabled(true); + requireNetwork(); + defaultSnitch = DatabaseDescriptor.getLocator(); + localEndpoint = FBUtilities.getBroadcastAddressAndPort(); + hostId = StorageService.instance.getHostIdForEndpoint(localEndpoint); + AutoRepairUtils.setup(); + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.create(false, + ImmutableMap.of("class", "NetworkTopologyStrategy", "datacenter1", "1")), + TableMetadata.builder("ks", "tbl") + .addPartitionKeyColumn("k", UTF8Type.instance) + .build()); + } + + @Before + public void setup() + { + MockitoAnnotations.initMocks(this); + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY)); + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY)); + } + + @Test + public void testSetForceRepair() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + AutoRepairUtils.setForceRepair(repairType, ImmutableSet.of(localEndpoint)); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + assertTrue(result.one().getBoolean(COL_FORCE_REPAIR)); + } + + @Test + public void testSetForceRepairNewNode() + { + AutoRepairUtils.setForceRepairNewNode(repairType); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + assertTrue(result.one().getBoolean(COL_FORCE_REPAIR)); + } + + + @Test + public void testClearDeleteHosts() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, delete_hosts, delete_hosts_update_time) VALUES ('%s', %s, { %s }, toTimestamp(now()))", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId, hostId)); + + AutoRepairUtils.clearDeleteHosts(repairType, hostId); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT delete_hosts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + Set deleteHosts = result.one().getSet(COL_DELETE_HOSTS, UUIDType.instance); + assertNull(deleteHosts); + } + + @Test + public void testGetAutoRepairHistoryForLocalGroup() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + List history = AutoRepairUtils.getAutoRepairHistoryForLocalGroup(repairType); + + assertNotNull(history); + assertEquals(1, history.size()); + assertEquals(hostId, history.get(0).hostId); + } + + @Test + public void testGetAutoRepairHistoryForLocalGroup_empty_history() + { + List history = AutoRepairUtils.getAutoRepairHistoryForLocalGroup(repairType); + + assertNull(history); + } + + @Test + public void testGetCurrentRepairStatus() + { + UUID forceRepair = UUID.randomUUID(); + UUID regularRepair = UUID.randomUUID(); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, force_repair, repair_start_ts) VALUES ('%s', %s, true, toTimestamp(now()))", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), forceRepair)); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, repair_start_ts) VALUES ('%s', %s, toTimestamp(now()))", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), regularRepair)); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString(), regularRepair)); + + CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType); + + assertNotNull(status); + assertEquals(1, status.historiesWithoutOnGoingRepair.size()); + assertEquals(hostId, status.historiesWithoutOnGoingRepair.get(0).hostId); + assertEquals(1, status.hostIdsWithOnGoingRepair.size()); + assertTrue(status.hostIdsWithOnGoingRepair.contains(regularRepair)); + assertEquals(1, status.hostIdsWithOnGoingForceRepair.size()); + assertTrue(status.hostIdsWithOnGoingForceRepair.contains(forceRepair)); + assertEquals(1, status.priority.size()); + assertTrue(status.priority.contains(regularRepair)); + } + + @Test + public void testGetHostIdsInCurrentRing() + { + TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType); + + assertNotNull(hosts); + assertEquals(1, hosts.size()); + assertTrue(hosts.contains(hostId)); + } + + @Test + public void testGetHostIdsInCurrentRing_multiple_nodes() + { + InetAddressAndPort ignoredEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 1); + InetAddressAndPort deadEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 2); + DatabaseDescriptor.getAutoRepairConfig().setIgnoreDCs(repairType, ImmutableSet.of("dc2")); + //DatabaseDescriptor.setEndpointSnitch(snitchMock); + when(snitchMock.location(localEndpoint)).thenReturn(new Location("dc1", "rac1")); + when(snitchMock.location(ignoredEndpoint)).thenReturn(new Location("dc2", "rac1")); + when(snitchMock.location(deadEndpoint)).thenReturn(new Location("dc1", "rac1")); + + TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType, ImmutableSet.of(new NodeAddresses(localEndpoint), new NodeAddresses(ignoredEndpoint), new NodeAddresses(deadEndpoint))); + + assertNotNull(hosts); + assertEquals(1, hosts.size()); + assertTrue(hosts.contains(hostId)); + } + + @Test + public void testGetHostWithLongestUnrepairTime() + { + UUID otherHostId = UUID.randomUUID(); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, repair_finish_ts) VALUES ('%s', %s, toTimestamp(now()))", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), otherHostId)); + + AutoRepairHistory history = AutoRepairUtils.getHostWithLongestUnrepairTime(repairType); + + assertEquals(hostId, history.hostId); + } + + @Test + public void testGetMaxNumberOfNodeRunAutoRepairInGroup_0_group_size() + { + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCountInGroup(repairType, 2); + + int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepairInGroup(repairType, 0); + + assertEquals(2, count); + } + + + @Test + public void testGetMaxNumberOfNodeRunAutoRepairInGroup_percentage() + { + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCountInGroup(repairType, 2); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairPercentageInGroup(repairType, 50); + + + int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepairInGroup(repairType, 10); + + assertEquals(5, count); + } + + @Test + public void testDeleteAutoRepairHistory() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + AutoRepairUtils.deleteAutoRepairHistory(repairType, hostId); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(0, result.size()); + } + + @Test + public void testUpdateStartAutoRepairHistory() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + AutoRepairUtils.updateStartAutoRepairHistory(repairType, hostId, 123, AutoRepairUtils.RepairTurn.MY_TURN); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT repair_start_ts, repair_turn FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + UntypedResultSet.Row row = result.one(); + assertEquals(123, row.getLong(COL_REPAIR_START_TS, 0)); + assertEquals(AutoRepairUtils.RepairTurn.MY_TURN.toString(), row.getString(COL_REPAIR_TURN)); + } + + @Test + public void testUpdateFinishAutoRepairHistory() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + AutoRepairUtils.updateFinishAutoRepairHistory(repairType, hostId, 123); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT repair_finish_ts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + assertEquals(123, result.one().getLong(COL_REPAIR_FINISH_TS, 0)); + } + + @Test + public void testAddHostIdToDeleteHosts() + { + UUID otherHostId = UUID.randomUUID(); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), otherHostId)); + + AutoRepairUtils.addHostIdToDeleteHosts(repairType, hostId, otherHostId); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), otherHostId)); + assertNotNull(result); + assertEquals(1, result.size()); + Set deleteHosts = result.one().getSet(COL_DELETE_HOSTS, UUIDType.instance); + assertNotNull(deleteHosts); + assertEquals(1, deleteHosts.size()); + assertTrue(deleteHosts.contains(hostId)); + } + + @Test + public void testAddPriorityHost() + { + AutoRepairUtils.addPriorityHosts(repairType, ImmutableSet.of(localEndpoint)); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT * FROM %s.%s WHERE repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString())); + assertNotNull(result); + assertEquals(1, result.size()); + Set repairPriority = result.one().getSet(COL_REPAIR_PRIORITY, UUIDType.instance); + assertNotNull(repairPriority); + assertEquals(1, repairPriority.size()); + assertTrue(repairPriority.contains(hostId)); + } + + @Test + public void testRemovePriorityStatus() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString(), hostId)); + + AutoRepairUtils.removePriorityStatus(repairType, hostId); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT * FROM %s.%s WHERE repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString())); + assertNotNull(result); + assertEquals(1, result.size()); + Set repairPriority = result.one().getSet(COL_REPAIR_PRIORITY, UUIDType.instance); + assertNull(repairPriority); + } + + @Test + public void testGetPriorityHosts() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString(), hostId)); + + Set hosts = AutoRepairUtils.getPriorityHosts(repairType); + + assertNotNull(hosts); + assertEquals(1, hosts.size()); + assertTrue(hosts.contains(localEndpoint)); + } + + @Test + public void testCheckNodeContainsKeyspaceReplica() + { + Keyspace ks = Keyspace.open("ks"); + + assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + } + + @Test + public void testTableMaxRepairTimeExceeded() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairTableMaxRepairTime(repairType, "0s"); + + assertTrue(AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, 0)); + } + + @Test + public void testKeyspaceMaxRepairTimeExceeded() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairTableMaxRepairTime(repairType, "0s"); + + assertTrue(AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, 0, 1)); + } + + @Test + public void testGetLastRepairFinishTime() + { + AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); + + assertEquals(0, history.getLastRepairFinishTime()); + + history.lastRepairFinishTime = 100; + + assertEquals(100, history.getLastRepairFinishTime()); + } + + @Test + public void testMyTurnToRunRepairShouldReturnMyTurnWhenRepairOngoing() + { + UUID myID = UUID.randomUUID(); + UUID otherID = UUID.randomUUID(); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCountInGroup(repairType, 5); + long currentMillis = System.currentTimeMillis(); + // finish time less than start time means that repair is ongoing + AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); + // finish time is larger than start time means that repair for other node is finished + AutoRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); + + assertEquals(AutoRepairUtils.RepairTurn.MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myID)); + } +} diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index c59163ae0bc3..f5c3f03087d2 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -36,6 +36,10 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; + +import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; +import org.apache.cassandra.utils.TimeUUID; +import org.apache.cassandra.utils.concurrent.Condition; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -72,6 +76,7 @@ import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.Refs; +import org.mockito.Mock; import static org.apache.cassandra.ServerTestUtils.*; import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; @@ -81,12 +86,15 @@ import static org.apache.cassandra.repair.messages.RepairOption.INCREMENTAL_KEY; import static org.apache.cassandra.repair.messages.RepairOption.RANGES_KEY; import static org.apache.cassandra.service.ActiveRepairService.UNREPAIRED_SSTABLE; +import static org.apache.cassandra.service.ActiveRepairService.instance; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.initMocks; public class ActiveRepairServiceTest { @@ -98,6 +106,8 @@ public class ActiveRepairServiceTest public String cfname; public ColumnFamilyStore store; public static InetAddressAndPort LOCAL, REMOTE; + @Mock + public DiskUsageMonitor diskUsageMonitor; @BeforeClass public static void defineSchema() throws ConfigurationException, UnknownHostException @@ -122,6 +132,7 @@ public void prepare() throws Exception NodeId remote = Register.register(new NodeAddresses(REMOTE)); UnsafeJoin.unsafeJoin(local, Collections.singleton(DatabaseDescriptor.getPartitioner().getRandomToken())); UnsafeJoin.unsafeJoin(remote, Collections.singleton(DatabaseDescriptor.getPartitioner().getMinimumToken())); + initMocks(this); } @Test @@ -489,7 +500,9 @@ public void testRepairSessionSpaceInMiB() activeRepairService.setRepairSessionSpaceInMiB(0); fail("Should have received an IllegalArgumentException for depth of 0"); } - catch (IllegalArgumentException ignored) { } + catch (IllegalArgumentException ignored) + { + } Assert.assertEquals(10, activeRepairService.getRepairSessionSpaceInMiB()); } @@ -499,6 +512,40 @@ public void testRepairSessionSpaceInMiB() } } + public void testVerifyDiskHeadroomThresholdFullRepair() + { + Assert.assertTrue(ActiveRepairService.verifyDiskHeadroomThreshold(TimeUUID.maxAtUnixMillis(0), PreviewKind.NONE, false)); + } + + @Test + public void testVerifyDiskHeadroomThresholdDiskFull() + { + DiskUsageMonitor.instance = diskUsageMonitor; + when(diskUsageMonitor.getDiskUsage()).thenReturn(1.0); + DatabaseDescriptor.setIncrementalRepairDiskHeadroomRejectRatio(1.0); + + Assert.assertFalse(ActiveRepairService.verifyDiskHeadroomThreshold(TimeUUID.maxAtUnixMillis(0), PreviewKind.NONE, true)); + } + + @Test + public void testVerifyDiskHeadroomThresholdSufficientDisk() + { + DiskUsageMonitor.instance = diskUsageMonitor; + when(diskUsageMonitor.getDiskUsage()).thenReturn(0.0); + DatabaseDescriptor.setIncrementalRepairDiskHeadroomRejectRatio(0.0); + + Assert.assertTrue(ActiveRepairService.verifyDiskHeadroomThreshold(TimeUUID.maxAtUnixMillis(0), PreviewKind.NONE, true)); + } + + @Test(expected = RuntimeException.class) + public void testPrepareForRepairThrowsExceptionForInsufficientDisk() + { + DiskUsageMonitor.instance = diskUsageMonitor; + when(diskUsageMonitor.getDiskUsage()).thenReturn(1.5); + + instance().prepareForRepair(TimeUUID.maxAtUnixMillis(0), null, null, opts(INCREMENTAL_KEY, b2s(true)), false, null); + } + private static class Task implements Runnable { private final Condition blocked; From 9c02ce9472168977bf6c5413fd91b26728d6a66d Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 23 Jul 2024 14:28:07 -0700 Subject: [PATCH 002/257] cr --- .../cassandra/repair/autorepair/AutoRepair.java | 15 ++++++++++----- .../repair/autorepair/AutoRepairState.java | 3 ++- .../repair/autorepair/AutoRepairUtils.java | 15 ++++++++------- .../apache/cassandra/schema/AutoRepairParams.java | 11 +++++++---- .../org/apache/cassandra/schema/TableParams.java | 4 ++-- .../autorepair/AutoRepairParameterizedTest.java | 3 +-- .../repair/autorepair/AutoRepairTest.java | 3 +-- 7 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index f6a08e73b16d..1750d4c698df 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -26,7 +26,6 @@ import java.util.Map; import java.util.Set; import java.util.UUID; -import java.util.concurrent.FutureTask; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; @@ -40,7 +39,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.concurrent.NamedThreadFactory; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; @@ -54,6 +52,7 @@ import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; +import org.apache.cassandra.utils.concurrent.Future; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; @@ -74,6 +73,9 @@ public class AutoRepair @VisibleForTesting protected final Map repairExecutors; + + protected final Map repairRunnableExecutors; + @VisibleForTesting protected final Map repairStates; @@ -85,10 +87,12 @@ protected AutoRepair() { AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType, Thread.NORM_PRIORITY)); + repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType, Thread.NORM_PRIORITY)); repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); tokenRangeSplitters.put(repairType, FBUtilities.newAutoRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType))); } @@ -298,7 +302,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : ImmutableList.of(tableName), ranges, primaryRangeOnly); repairState.resetWaitCondition(); - new Thread(NamedThreadFactory.createAnonymousThread(new FutureTask<>(task, null))).start(); + Future f = repairRunnableExecutors.get(repairType).submit(task); try { repairState.waitForRepairToComplete(); @@ -318,10 +322,11 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) else { repairSuccess = false; + boolean cancellationStatus = f.cancel(true); //in future we can add retry, etc. logger.info("Repair failed for range {}-{} for {}.{} total subranges: {}," + - "processed subranges: {}", childStartToken, childEndToken, - keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, totalSubRanges, totalProcessedSubRanges); + "processed subranges: {}, cancellationStatus: {}", childStartToken, childEndToken, + keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, totalSubRanges, totalProcessedSubRanges, cancellationStatus); } ranges.clear(); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index cb3282c6cc60..94afac119b24 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -50,6 +50,7 @@ import java.util.function.Supplier; import java.util.stream.Collectors; +import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; // AutoRepairState represents the state of automated repair for a given repair type. @@ -115,7 +116,7 @@ protected RepairCoordinator getRepairRunnable(String keyspace, RepairOption opti public void progress(String tag, ProgressEvent event) { ProgressEventType type = event.getType(); - String message = String.format("[%s] %s", format.format(System.currentTimeMillis()), event.getMessage()); + String message = String.format("[%s] %s", format.format(currentTimeMillis()), event.getMessage()); if (type == ProgressEventType.ERROR) { logger.error("Repair failure for {} repair: {}", repairType.toString(), message); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index ea052acf1354..85c89fe661eb 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -76,6 +76,7 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; /** * This class serves as a utility class for AutoRepair. It contains various helper APIs @@ -339,7 +340,7 @@ public static void setForceRepairNewNode(RepairType repairType) // this function will be called when a node bootstrap finished UUID hostId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); // insert the data first - insertNewRepairHistory(repairType, System.currentTimeMillis(), System.currentTimeMillis()); + insertNewRepairHistory(repairType, currentTimeMillis(), currentTimeMillis()); setForceRepair(repairType, hostId); } @@ -478,7 +479,7 @@ public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); if (nodeHistory.deleteHosts.size() > 0 && config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( - System.currentTimeMillis() - nodeHistory.deleteHostsUpdateTime + currentTimeMillis() - nodeHistory.deleteHostsUpdateTime )) { clearDeleteHosts(repairType, nodeHistory.hostId); @@ -509,7 +510,7 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) if (!autoRepairHistoryIds.contains(hostId)) { logger.info("{} for repair type {} doesn't exist in the auto repair history table, insert a new record.", repairType, hostId); - insertNewRepairHistory(repairType, hostId, System.currentTimeMillis(), System.currentTimeMillis()); + insertNewRepairHistory(repairType, hostId, currentTimeMillis(), currentTimeMillis()); } } @@ -652,7 +653,7 @@ public static void insertNewRepairHistory(RepairType repairType, UUID hostId, lo ByteBufferUtil.bytes(hostId), ByteBufferUtil.bytes(startTime), ByteBufferUtil.bytes(finishTime), - ByteBufferUtil.bytes(System.currentTimeMillis()) + ByteBufferUtil.bytes(currentTimeMillis()) ), false, -1, null, cl, ProtocolVersion.CURRENT, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME), Dispatcher.RequestTime.forImmediateExecution()); resultSet = UntypedResultSet.create(resultMessage.result); @@ -684,7 +685,7 @@ public static void addHostIdToDeleteHosts(RepairType repairType, UUID myID, UUID addHostIDToDeleteHostsStatement.execute(QueryState.forInternalCalls(), QueryOptions.forInternalCalls(internalQueryCL, Lists.newArrayList(serializer.serialize(new HashSet<>(Arrays.asList(myID))), - ByteBufferUtil.bytes(System.currentTimeMillis()), + ByteBufferUtil.bytes(currentTimeMillis()), ByteBufferUtil.bytes(repairType.toString()), ByteBufferUtil.bytes(hostToBeDeleted) )), Dispatcher.RequestTime.forImmediateExecution()); @@ -782,14 +783,14 @@ public static boolean checkNodeContainsKeyspaceReplica(Keyspace ks) public static boolean tableMaxRepairTimeExceeded(RepairType repairType, long startTime) { long tableRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds - (System.currentTimeMillis() - startTime); + (currentTimeMillis() - startTime); return AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() < tableRepairTimeSoFar; } public static boolean keyspaceMaxRepairTimeExceeded(RepairType repairType, long startTime, int numOfTablesToBeRepaired) { - long keyspaceRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds((System.currentTimeMillis() - startTime)); + long keyspaceRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds((currentTimeMillis() - startTime)); return (long) AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() * numOfTablesToBeRepaired < keyspaceRepairTimeSoFar; } diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index 50f482caf1b7..ae97e562635f 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -63,13 +63,16 @@ public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Ma { optionsMap.put(entry.getKey(), new HashMap<>(entry.getValue())); } - for (Map.Entry entry : options.entrySet()) + if (options != null) { - if (!Option.ENABLED.toString().equals(entry.getKey().toLowerCase())) + for (Map.Entry entry : options.entrySet()) { - throw new ConfigurationException(format("Unknown property '%s'", entry.getKey())); + if (!Option.ENABLED.toString().equals(entry.getKey().toLowerCase())) + { + throw new ConfigurationException(format("Unknown property '%s'", entry.getKey())); + } + optionsMap.get(repairType).put(entry.getKey(), entry.getValue()); } - optionsMap.get(repairType).put(entry.getKey(), entry.getValue()); } AutoRepairParams repairParams = new AutoRepairParams(repairType); repairParams.options = optionsMap; diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index ee512841869e..c73a914de12c 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -19,7 +19,7 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.HashMap; +import java.util.EnumMap; import java.util.Map; import java.util.Map.Entry; @@ -168,7 +168,7 @@ private TableParams(Builder builder) transactionalMigrationFrom = builder.transactionalMigrationFrom; pendingDrop = builder.pendingDrop; checkNotNull(transactionalMigrationFrom); - automatedRepair = new HashMap<>() + automatedRepair = new EnumMap(AutoRepairConfig.RepairType.class) { { put(AutoRepairConfig.RepairType.full, builder.automatedRepairFull); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 0329454fb381..9257266654a8 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -39,13 +39,13 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Pair; +import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import junit.framework.Assert; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; @@ -71,7 +71,6 @@ import org.mockito.MockitoAnnotations; import static org.apache.cassandra.Util.setAutoRepairEnabled; -import static org.apache.cassandra.Util.token; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index b16e0ea5b491..c1837aa6a2cc 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -24,8 +24,7 @@ import org.junit.BeforeClass; import org.junit.Test; -import junit.framework.Assert; - +import org.junit.Assert; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.Keyspace; From 5512397126c63cbd01f259d364ae8b93e34ec68d Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sat, 27 Jul 2024 09:43:23 -0700 Subject: [PATCH 003/257] chop the repair API --- .../repair/autorepair/AutoRepair.java | 150 ++++++++++-------- 1 file changed, 83 insertions(+), 67 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 1750d4c698df..1225434d0106 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -140,10 +140,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) logger.debug("Auto-repair is disabled for repair type {}", repairType); return; } - - AutoRepairState repairState = repairStates.get(repairType); - try { String localDC = DatabaseDescriptor.getLocalDataCenter(); @@ -169,19 +166,9 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) && turn != MY_TURN_FORCE_REPAIR; repairState.setTotalTablesConsideredForRepair(0); - if (repairState.getLastRepairTime() != 0) + if (tooSoonToRunRepair(repairType, repairState, config)) { - /** check if it is too soon to run repair. one of the reason we - * should not run frequent repair is because repair triggers - * memtable flush - */ - long timeElapsedSinceLastRepair = TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - repairState.getLastRepairTime()); - if (timeElapsedSinceLastRepair < config.getRepairMinInterval(repairType).toSeconds()) - { - logger.info("Too soon to run repair, last repair was done {} seconds ago", - timeElapsedSinceLastRepair); - return; - } + return; } long startTime = timeFunc.get(); @@ -197,34 +184,16 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) repairState.setTotalMVTablesConsideredForRepair(0); for (Keyspace keyspace : Keyspace.all()) { - Tables tables = keyspace.getMetadata().tables; - Iterator iter = tables.iterator(); - String keyspaceName = keyspace.getName(); if (!AutoRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) { continue; } repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); - List tablesToBeRepaired = new ArrayList<>(); - while (iter.hasNext()) - { - repairState.setTotalTablesConsideredForRepair(repairState.getTotalTablesConsideredForRepair() + 1); - TableMetadata tableMetadata = iter.next(); - String tableName = tableMetadata.name; - tablesToBeRepaired.add(tableName); - - // See if we should repair MVs as well that are associated with this given table - List mvs = AutoRepairUtils.getAllMVs(repairType, keyspace, tableMetadata); - if (!mvs.isEmpty()) - { - tablesToBeRepaired.addAll(mvs); - repairState.setTotalMVTablesConsideredForRepair(repairState.getTotalMVTablesConsideredForRepair() + mvs.size()); - } - } - + List tablesToBeRepaired = retrieveTablesToBeRepaired(keyspace, repairType, repairState); for (String tableName : tablesToBeRepaired) { + String keyspaceName = keyspace.getName(); try { ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); @@ -356,38 +325,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) } } } - - //if it was due to priority then remove it now - if (turn == MY_TURN_DUE_TO_PRIORITY) - { - logger.info("Remove current host from priority list"); - AutoRepairUtils.removePriorityStatus(repairType, myId); - } - - repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); - long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); - logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + - "repairTableSuccessCount {}, repairTableFailureCount {}, " + - "repairTableSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), - repairState.getRepairTableSuccessCount(), repairState.getRepairFailedTablesCount(), - repairState.getRepairSkippedTablesCount()); - if (repairState.getLastRepairTime() != 0) - { - repairState.setClusterRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - - repairState.getLastRepairTime())); - logger.info("Cluster repair time for repair type {}: {} day(s)", repairType, - TimeUnit.SECONDS.toDays(repairState.getClusterRepairTimeInSec())); - } - repairState.setLastRepairTime(timeFunc.get()); - if (timeInHours == 0 && millisToWait > 0) - { - //If repair finished quickly, happens for an empty instance, in such case - //wait for a minute so that the JMX metrics can detect the repairInProgress - logger.info("Wait for {} milliseconds for repair type {}.", millisToWait, repairType); - Thread.sleep(millisToWait); - } - repairState.setRepairInProgress(false); - AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId, timeFunc.get()); + cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, millisToWait); } else { @@ -400,6 +338,84 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) } } + private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoRepairState repairState, AutoRepairConfig config) + { + if (repairState.getLastRepairTime() != 0) + { + /** check if it is too soon to run repair. one of the reason we + * should not run frequent repair is that repair triggers + * memtable flush + */ + long timeElapsedSinceLastRepair = TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - repairState.getLastRepairTime()); + if (timeElapsedSinceLastRepair < config.getRepairMinInterval(repairType).toSeconds()) + { + logger.info("Too soon to run repair, last repair was done {} seconds ago", + timeElapsedSinceLastRepair); + return true; + } + } + return false; + } + + private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairConfig.RepairType repairType, AutoRepairState repairState) + { + Tables tables = keyspace.getMetadata().tables; + List tablesToBeRepaired = new ArrayList<>(); + Iterator iter = tables.iterator(); + while (iter.hasNext()) + { + repairState.setTotalTablesConsideredForRepair(repairState.getTotalTablesConsideredForRepair() + 1); + TableMetadata tableMetadata = iter.next(); + String tableName = tableMetadata.name; + tablesToBeRepaired.add(tableName); + + // See if we should repair MVs as well that are associated with this given table + List mvs = AutoRepairUtils.getAllMVs(repairType, keyspace, tableMetadata); + if (!mvs.isEmpty()) + { + tablesToBeRepaired.addAll(mvs); + repairState.setTotalMVTablesConsideredForRepair(repairState.getTotalMVTablesConsideredForRepair() + mvs.size()); + } + } + return tablesToBeRepaired; + } + + private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, + long startTime, long millisToWait) throws InterruptedException + { + //if it was due to priority then remove it now + if (turn == MY_TURN_DUE_TO_PRIORITY) + { + logger.info("Remove current host from priority list"); + AutoRepairUtils.removePriorityStatus(repairType, myId); + } + + repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); + long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); + logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + + "repairTableSuccessCount {}, repairTableFailureCount {}, " + + "repairTableSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), + repairState.getRepairTableSuccessCount(), repairState.getRepairFailedTablesCount(), + repairState.getRepairSkippedTablesCount()); + if (repairState.getLastRepairTime() != 0) + { + repairState.setClusterRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - + repairState.getLastRepairTime())); + logger.info("Cluster repair time for repair type {}: {} day(s)", repairType, + TimeUnit.SECONDS.toDays(repairState.getClusterRepairTimeInSec())); + } + repairState.setLastRepairTime(timeFunc.get()); + if (timeInHours == 0 && millisToWait > 0) + { + //If repair finished quickly, happens for an empty instance, in such case + //wait for a minute so that the JMX metrics can detect the repairInProgress + logger.info("Wait for {} milliseconds for repair type {}.", millisToWait, repairType); + Thread.sleep(millisToWait); + } + repairState.setRepairInProgress(false); + AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId, timeFunc.get()); + } + public AutoRepairState getRepairState(AutoRepairConfig.RepairType repairType) { return repairStates.get(repairType); From bdebdfd84f51ff509ca9e2287f211c441883a931 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 16 Aug 2024 14:54:06 -0700 Subject: [PATCH 004/257] Add a test case that ensures LocalStrategy.class is ignored from being repaired --- .../cassandra/repair/autorepair/AutoRepairUtilsTest.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index b474697cfb5f..5507fa676cec 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -64,6 +64,7 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_START_TS; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_TURN; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @@ -81,7 +82,6 @@ public class AutoRepairUtilsTest extends CQLTester static Locator defaultSnitch; - @BeforeClass public static void setupClass() throws Exception { @@ -476,4 +476,11 @@ public void testMyTurnToRunRepairShouldReturnMyTurnWhenRepairOngoing() assertEquals(AutoRepairUtils.RepairTurn.MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myID)); } + + @Test + public void testLocalStrategyAndNetworkKeyspace() + { + assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open("system"))); + assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open(KEYSPACE))); + } } From 70d3ed9cbb95579b2bf3016d372dd7cfe0e9c35b Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 16 Aug 2024 15:14:20 -0700 Subject: [PATCH 005/257] Shuffle keyspaces and tables when running auto-repair --- .../repair/autorepair/AutoRepair.java | 14 +++++++++- .../AutoRepairParameterizedTest.java | 26 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 1225434d0106..f2436292c481 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -27,6 +27,7 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; import java.util.function.Supplier; import com.google.common.annotations.VisibleForTesting; @@ -79,6 +80,9 @@ public class AutoRepair @VisibleForTesting protected final Map repairStates; + @VisibleForTesting + protected static Consumer> shuffleFunc = java.util.Collections::shuffle; + protected final Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); @@ -182,7 +186,14 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) repairState.setRepairSkippedTablesCount(0); repairState.setRepairInProgress(true); repairState.setTotalMVTablesConsideredForRepair(0); - for (Keyspace keyspace : Keyspace.all()) + + List keyspaces = new ArrayList<>(); + Keyspace.all().forEach(keyspaces::add); + // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair + // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. + shuffleFunc.accept(keyspaces); + + for (Keyspace keyspace : keyspaces) { if (!AutoRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) { @@ -191,6 +202,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); List tablesToBeRepaired = retrieveTablesToBeRepaired(keyspace, repairType, repairState); + shuffleFunc.accept(tablesToBeRepaired); for (String tableName : tablesToBeRepaired) { String keyspaceName = keyspace.getName(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 9257266654a8..5e0b4ae73528 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -183,6 +183,8 @@ public void setup() AutoRepair.timeFunc = System::currentTimeMillis; resetCounters(); resetConfig(); + + AutoRepair.shuffleFunc = java.util.Collections::shuffle; } @@ -579,4 +581,28 @@ public void testDefaultAutomatedRepair() Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); } + + @Test + public void testRepairShufflesKeyspacesAndTables() + { + AtomicInteger shuffleKeyspacesCall = new AtomicInteger(); + AtomicInteger shuffleTablesCall = new AtomicInteger(); + AutoRepair.shuffleFunc = (List list) -> { + assertTrue(list.get(0) instanceof Keyspace || list.get(0) instanceof String); + if (list.get(0) instanceof Keyspace) + { + shuffleKeyspacesCall.getAndIncrement(); + assertFalse(list.isEmpty()); + } + else if (list.get(0) instanceof String) + { + shuffleTablesCall.getAndIncrement(); + } + }; + + AutoRepair.instance.repair(repairType, 0); + + assertEquals(1, shuffleKeyspacesCall.get()); + assertEquals(4, shuffleTablesCall.get()); + } } From b19dc9129c6443959dc3ec25d70e9389d55b9b18 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Sat, 17 Aug 2024 10:06:52 -0700 Subject: [PATCH 006/257] Prevent node restart from scheduling a premature auto-repair cycle --- .../repair/autorepair/AutoRepair.java | 33 ++++++------ .../repair/autorepair/AutoRepairUtils.java | 28 ++++++++++ .../AutoRepairParameterizedTest.java | 51 ++++++++++++------- .../autorepair/AutoRepairUtilsTest.java | 20 ++++++++ 4 files changed, 100 insertions(+), 32 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index f2436292c481..c8da85c48a3e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -169,8 +169,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) // When doing force repair, we want to repair without -pr. boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) && turn != MY_TURN_FORCE_REPAIR; - repairState.setTotalTablesConsideredForRepair(0); - if (tooSoonToRunRepair(repairType, repairState, config)) + if (tooSoonToRunRepair(repairType, repairState, config, myId)) { return; } @@ -185,6 +184,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) repairState.setRepairFailedTablesCount(0); repairState.setRepairSkippedTablesCount(0); repairState.setRepairInProgress(true); + repairState.setTotalTablesConsideredForRepair(0); repairState.setTotalMVTablesConsideredForRepair(0); List keyspaces = new ArrayList<>(); @@ -350,21 +350,24 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) } } - private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoRepairState repairState, AutoRepairConfig config) + private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoRepairState repairState, AutoRepairConfig config, UUID myId) { - if (repairState.getLastRepairTime() != 0) + if (repairState.getLastRepairTime() == 0) { - /** check if it is too soon to run repair. one of the reason we - * should not run frequent repair is that repair triggers - * memtable flush - */ - long timeElapsedSinceLastRepair = TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - repairState.getLastRepairTime()); - if (timeElapsedSinceLastRepair < config.getRepairMinInterval(repairType).toSeconds()) - { - logger.info("Too soon to run repair, last repair was done {} seconds ago", - timeElapsedSinceLastRepair); - return true; - } + // the node has either just boooted or has not run repair before, + // we should check for the node's repair history in the DB + repairState.setLastRepairTime(AutoRepairUtils.getLastRepairTimeForNode(repairType, myId)); + } + /** check if it is too soon to run repair. one of the reason we + * should not run frequent repair is that repair triggers + * memtable flush + */ + long timeElapsedSinceLastRepair = TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - repairState.getLastRepairTime()); + if (timeElapsedSinceLastRepair < config.getRepairMinInterval(repairType).toSeconds()) + { + logger.info("Too soon to run repair, last repair was done {} seconds ago", + timeElapsedSinceLastRepair); + return true; } return false; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 85c89fe661eb..a041ed84e95e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -144,10 +144,15 @@ public class AutoRepairUtils , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); + final static String SELECT_LAST_REPAIR_TIME_FOR_NODE = String.format( + "SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?", COL_REPAIR_FINISH_TS, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); + static ModificationStatement delStatementRepairHistory; static SelectStatement selectStatementRepairHistory; static ModificationStatement delStatementPriorityStatus; static SelectStatement selectStatementRepairPriority; + static SelectStatement selectLastRepairTimeForNode; static ModificationStatement addPriorityHost; static ModificationStatement insertNewRepairHistoryStatement; static ModificationStatement recordStartRepairHistoryStatement; @@ -171,6 +176,10 @@ public static void setup() .forInternalCalls()); selectStatementRepairPriority = (SelectStatement) QueryProcessor.getStatement(SELECT_REPAIR_PRIORITY, ClientState .forInternalCalls()); + + selectLastRepairTimeForNode = (SelectStatement) QueryProcessor.getStatement(SELECT_LAST_REPAIR_TIME_FOR_NODE, ClientState + .forInternalCalls()); + delStatementPriorityStatus = (ModificationStatement) QueryProcessor.getStatement(DEL_REPAIR_PRIORITY, ClientState .forInternalCalls()); addPriorityHost = (ModificationStatement) QueryProcessor.getStatement(ADD_PRIORITY_HOST, ClientState @@ -371,6 +380,25 @@ public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType) return getCurrentRepairStatus(repairType, autoRepairHistories); } + public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) + { + ResultMessage.Rows rows = selectLastRepairTimeForNode.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList( + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId))), + Dispatcher.RequestTime.forImmediateExecution()); + + UntypedResultSet repairTime = UntypedResultSet.create(rows.result); + + if (repairTime.isEmpty()) + { + return 0; + } + + return repairTime.one().getLong(COL_REPAIR_FINISH_TS); + } + public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories) { if (autoRepairHistories != null) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 5e0b4ae73528..812e62f8f55c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -151,14 +151,8 @@ public static void setupClass() throws Exception QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); - defaultConfig = new AutoRepairConfig(true); DatabaseDescriptor.setMaterializedViewsEnabled(false); DatabaseDescriptor.setCDCEnabled(false); - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) - { - defaultConfig.setAutoRepairEnabled(repairType, true); - defaultConfig.setMVRepairEnabled(repairType, false); - } } @Before @@ -199,6 +193,15 @@ private void resetCounters() private void resetConfig() { + // prepare a fresh default config + defaultConfig = new AutoRepairConfig(true); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + defaultConfig.setAutoRepairEnabled(repairType, true); + defaultConfig.setMVRepairEnabled(repairType, false); + } + + // reset the AutoRepairService config to default AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.repair_type_overrides = defaultConfig.repair_type_overrides; config.global_settings = defaultConfig.global_settings; @@ -265,9 +268,6 @@ public void testTooFrequentRepairs() long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertEquals(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); - consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); - Assert.assertEquals("Expected total repaired tables = 0, actual value: " + consideredTables, - consideredTables, 0); assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); } @@ -369,24 +369,23 @@ public void testMVRepair() { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMVRepairEnabled(repairType, true); - AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); AutoRepair.instance.repair(repairType, 0); assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); config.setMVRepairEnabled(repairType, false); - AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); AutoRepair.instance.repair(repairType, 0); assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); config.setMVRepairEnabled(repairType, true); - AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); AutoRepair.instance.repair(repairType, 0); assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - - config.setMVRepairEnabled(repairType, false); } @Test @@ -398,6 +397,7 @@ public void testSkipRepairSSTableCountHigherThreshold() ColumnFamilyStore cfsMVTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(MV); Set preBaseTable = cfsBaseTable.getLiveSSTables(); Set preMVTable = cfsBaseTable.getLiveSSTables(); + config.setRepairMinInterval(repairType, "0s"); for (int i = 0; i < 10; i++) { @@ -438,9 +438,6 @@ public void testSkipRepairSSTableCountHigherThreshold() assertEquals(0, state.getRepairSkippedTablesCount()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); - - config.setMVRepairEnabled(repairType, false); - config.setRepairSSTableCountHigherThreshold(repairType, beforeCount); } @Test @@ -600,9 +597,29 @@ else if (list.get(0) instanceof String) } }; + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repair(repairType, 0); assertEquals(1, shuffleKeyspacesCall.get()); assertEquals(4, shuffleTablesCall.get()); } + + @Test + public void testRepairTakesLastRepairTimeFromDB() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMVRepairEnabled(repairType, true); + long lastRepairTime = System.currentTimeMillis() - 1000; + AutoRepairUtils.insertNewRepairHistory(repairType, 0, lastRepairTime); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + config.setRepairMinInterval(repairType, "1h"); + + AutoRepair.instance.repair(repairType, 0); + + // repair scheduler should not attempt to run repair as last repair time in DB is current time - 1s + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair()); + // repair scheduler should load the repair time from the DB + assertEquals(lastRepairTime, AutoRepair.instance.repairStates.get(repairType).getLastRepairTime()); + } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 5507fa676cec..e529be786bb5 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -483,4 +483,24 @@ public void testLocalStrategyAndNetworkKeyspace() assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open("system"))); assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open(KEYSPACE))); } + + @Test + public void testGetLastRepairTimeForNode() + { + UUID myID = UUID.randomUUID(); + UUID otherID = UUID.randomUUID(); + long currentMillis = System.currentTimeMillis(); + AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); + AutoRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); + + assertEquals(currentMillis - 100, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); + } + + @Test + public void testGetLastRepairTimeForNodeWhenHistoryIsEmpty() + { + UUID myID = UUID.randomUUID(); + + assertEquals(0, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); + } } From 727542d3c955d72e71b0bfbec27194cbd074ce6a Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Sat, 17 Aug 2024 10:26:17 -0700 Subject: [PATCH 007/257] Implement SucceededTokenRangesCount and FailedTokenRangesCount metrics for auto-repair --- .../cassandra/metrics/AutoRepairMetrics.java | 15 ++++- .../repair/autorepair/AutoRepair.java | 28 ++++----- .../repair/autorepair/AutoRepairState.java | 46 +++++++-------- .../AutoRepairParameterizedTest.java | 6 +- .../autorepair/AutoRepairStateTest.java | 59 +++++++++---------- 5 files changed, 79 insertions(+), 75 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index cd46c462fb66..426034ffcd4b 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -36,7 +36,8 @@ public class AutoRepairMetrics public Gauge clusterRepairTimeInSec; public Gauge skippedTablesCount; public Gauge longestUnrepairedSec; - public Gauge failedTablesCount; + public Gauge succeededTokenRangesCount; + public Gauge failedTokenRangesCount; public Counter repairTurnMyTurn; public Counter repairTurnMyTurnDueToPriority; public Counter repairTurnMyTurnForceRepair; @@ -87,11 +88,19 @@ public Integer getValue() } }); - failedTablesCount = Metrics.register(factory.createMetricName("FailedTablesCount"), new Gauge() + succeededTokenRangesCount = Metrics.register(factory.createMetricName("SucceededTokenRangesCount"), new Gauge() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getRepairFailedTablesCount(); + return AutoRepair.instance.getRepairState(repairType).getSucceededTokenRangesCount(); + } + }); + + failedTokenRangesCount = Metrics.register(factory.createMetricName("FailedTokenRangesCount"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getFailedTokenRangesCount(); } }); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index c8da85c48a3e..41161d01c57e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -180,13 +180,14 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId, timeFunc.get(), turn); repairState.setRepairKeyspaceCount(0); - repairState.setRepairTableSuccessCount(0); - repairState.setRepairFailedTablesCount(0); repairState.setRepairSkippedTablesCount(0); repairState.setRepairInProgress(true); repairState.setTotalTablesConsideredForRepair(0); repairState.setTotalMVTablesConsideredForRepair(0); + int failedTokenRanges = 0; + int succeededTokenRanges = 0; + List keyspaces = new ArrayList<>(); Keyspace.all().forEach(keyspaces::add); // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair @@ -235,7 +236,6 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) logger.info("Repair table {}.{}", keyspaceName, tableName); } long tableStartTime = timeFunc.get(); - boolean repairSuccess = true; Set> ranges = new HashSet<>(); List> subRangesToBeRepaired = tokenRangeSplitters.get(repairType).getRange(repairType, primaryRangeOnly, keyspaceName, tableName); int totalSubRanges = subRangesToBeRepaired.size(); @@ -299,28 +299,20 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) logger.info("Repair completed for range {}-{} for {}.{}, total subranges: {}," + "processed subranges: {}", childStartToken, childEndToken, keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, totalSubRanges, totalProcessedSubRanges); + succeededTokenRanges += ranges.size(); } else { - repairSuccess = false; boolean cancellationStatus = f.cancel(true); //in future we can add retry, etc. logger.info("Repair failed for range {}-{} for {}.{} total subranges: {}," + "processed subranges: {}, cancellationStatus: {}", childStartToken, childEndToken, keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, totalSubRanges, totalProcessedSubRanges, cancellationStatus); + failedTokenRanges += ranges.size(); } ranges.clear(); } } - int touchedTables = config.getRepairByKeyspace(repairType) ? tablesToBeRepaired.size() : 1; - if (repairSuccess) - { - repairState.setRepairTableSuccessCount(repairState.getRepairTableSuccessCount() + touchedTables); - } - else - { - repairState.setRepairFailedTablesCount(repairState.getRepairFailedTablesCount() + touchedTables); - } if (config.getRepairByKeyspace(repairType)) { logger.info("Repair completed for keyspace {}, tables: {}", keyspaceName, tablesToBeRepaired); @@ -337,7 +329,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) } } } - cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, millisToWait); + cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, millisToWait, failedTokenRanges, succeededTokenRanges); } else { @@ -396,7 +388,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon } private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, - long startTime, long millisToWait) throws InterruptedException + long startTime, long millisToWait, int failedTokenRanges, int succeededTokenRanges) throws InterruptedException { //if it was due to priority then remove it now if (turn == MY_TURN_DUE_TO_PRIORITY) @@ -405,12 +397,14 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType AutoRepairUtils.removePriorityStatus(repairType, myId); } + repairState.setFailedTokenRangesCount(failedTokenRanges); + repairState.setSucceededTokenRangesCount(succeededTokenRanges); repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + - "repairTableSuccessCount {}, repairTableFailureCount {}, " + + "repairTokenRangesSuccessCount {}, repairTokenRangesFailureCount {}, " + "repairTableSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), - repairState.getRepairTableSuccessCount(), repairState.getRepairFailedTablesCount(), + repairState.getSucceededTokenRangesCount(), repairState.getFailedTokenRangesCount(), repairState.getRepairSkippedTablesCount()); if (repairState.getLastRepairTime() != 0) { diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index 94afac119b24..fc1538e21268 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -76,16 +76,16 @@ public abstract class AutoRepairState implements ProgressListener @VisibleForTesting protected int repairKeyspaceCount = 0; @VisibleForTesting - protected int repairTableSuccessCount = 0; - @VisibleForTesting protected int repairTableSkipCount = 0; @VisibleForTesting - protected int repairTableFailureCount = 0; - @VisibleForTesting protected int totalMVTablesConsideredForRepair = 0; - @VisibleForTesting protected int totalDisabledTablesRepairCount = 0; + + @VisibleForTesting + protected int failedTokenRangesCount = 0; + @VisibleForTesting + protected int succeededTokenRangesCount = 0; @VisibleForTesting protected AutoRepairHistory longestUnrepairedNode; @VisibleForTesting @@ -203,16 +203,6 @@ public int getLongestUnrepairedSec() return (int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - longestUnrepairedNode.getLastRepairFinishTime()); } - public void setRepairFailedTablesCount(int count) - { - repairTableFailureCount = count; - } - - public int getRepairFailedTablesCount() - { - return repairTableFailureCount; - } - public void setTotalMVTablesConsideredForRepair(int count) { totalMVTablesConsideredForRepair = count; @@ -238,24 +228,34 @@ public void setRepairKeyspaceCount(int count) repairKeyspaceCount = count; } - public void setRepairTableSuccessCount(int count) + public int getRepairKeyspaceCount() + { + return repairKeyspaceCount; + } + + public void setLongestUnrepairedNode(AutoRepairHistory longestUnrepairedNode) { - repairTableSuccessCount = count; + this.longestUnrepairedNode = longestUnrepairedNode; } - public int getRepairKeyspaceCount() + public void setFailedTokenRangesCount(int count) { - return repairKeyspaceCount; + failedTokenRangesCount = count; } - public int getRepairTableSuccessCount() + public int getFailedTokenRangesCount() { - return repairTableSuccessCount; + return failedTokenRangesCount; } - public void setLongestUnrepairedNode(AutoRepairHistory longestUnrepairedNode) + public void setSucceededTokenRangesCount(int count) { - this.longestUnrepairedNode = longestUnrepairedNode; + succeededTokenRangesCount = count; + } + + public int getSucceededTokenRangesCount() + { + return succeededTokenRangesCount; } public boolean isSuccess() diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 812e62f8f55c..2d94d7ebda0c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -477,12 +477,14 @@ public void testMetrics() AutoRepair.instance.repairStates.put(repairType, autoRepairState); when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); - when(autoRepairState.getRepairFailedTablesCount()).thenReturn(10); + when(autoRepairState.getFailedTokenRangesCount()).thenReturn(10); + when(autoRepairState.getSucceededTokenRangesCount()).thenReturn(11); when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); AutoRepair.instance.repair(repairType, 0); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); - assertTrue(AutoRepairMetricsManager.getMetrics(repairType).failedTablesCount.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).succeededTokenRangesCount.getValue() > 0); assertTrue(AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue() > 0); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index 972b6339954f..19aa0a8b439c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -247,23 +247,6 @@ public void testGetLongestUnrepairedSec() { } } - @Test - public void testSetRepairFailedTablesCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - - state.setRepairFailedTablesCount(1); - - assertEquals(1, state.repairTableFailureCount); - } - - @Test - public void testGetRepairFailedTablesCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.repairTableFailureCount = 1; - - assertEquals(1, state.getRepairFailedTablesCount()); - } - @Test public void testSetTotalMVTablesConsideredForRepair() { AutoRepairState state = RepairType.getAutoRepairState(repairType); @@ -307,40 +290,56 @@ public void testSetRepairKeyspaceCount() { assertEquals(1, state.repairKeyspaceCount); } + @Test + public void testGetRepairKeyspaceCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.repairKeyspaceCount = 1; + + assertEquals(1, state.getRepairKeyspaceCount()); + } @Test - public void testSetRepairTableSuccessCount() { + public void testSetLongestUnrepairedNode() { AutoRepairState state = RepairType.getAutoRepairState(repairType); + AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); - state.setRepairTableSuccessCount(1); + state.setLongestUnrepairedNode(history); - assertEquals(1, state.repairTableSuccessCount); + assertEquals(history, state.longestUnrepairedNode); } @Test - public void testGetRepairKeyspaceCount() { + public void testSetSucceededTokenRangesCount() { AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.repairKeyspaceCount = 1; - assertEquals(1, state.getRepairKeyspaceCount()); + state.setSucceededTokenRangesCount(1); + + assertEquals(1, state.succeededTokenRangesCount); } @Test - public void testGetRepairTableSuccessCount() { + public void testGetSucceededTokenRangesCount() { AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.repairTableSuccessCount = 1; + state.succeededTokenRangesCount = 1; - assertEquals(1, state.getRepairTableSuccessCount()); + assertEquals(1, state.getSucceededTokenRangesCount()); } @Test - public void testSetLongestUnrepairedNode() { + public void testSetFailedTokenRangesCount() { AutoRepairState state = RepairType.getAutoRepairState(repairType); - AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); - state.setLongestUnrepairedNode(history); + state.setFailedTokenRangesCount(1); - assertEquals(history, state.longestUnrepairedNode); + assertEquals(1, state.failedTokenRangesCount); + } + + @Test + public void testGetFailedTokenRangesCount() { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.failedTokenRangesCount = 1; + + assertEquals(1, state.getFailedTokenRangesCount()); } @Test From b2dc270ed56cedf00adc85a5715e6762e9de1c9b Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Sat, 17 Aug 2024 10:52:47 -0700 Subject: [PATCH 008/257] Add intial_scheduler_delay option to AutoRepairConfig --- .../repair/autorepair/AutoRepair.java | 5 +---- .../repair/autorepair/AutoRepairConfig.java | 15 +++++++++++++++ .../tools/nodetool/GetAutoRepairConfig.java | 1 + .../autorepair/AutoRepairConfigTest.java | 18 ++++++++++++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 41161d01c57e..c7ed1d708155 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -62,9 +62,6 @@ public class AutoRepair { - // Initial delay for repair session to start after setup - final static long INITIAL_REPAIR_DELAY_SEC = 30; - private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); @VisibleForTesting @@ -114,7 +111,7 @@ public void setup() { repairExecutors.get(repairType).scheduleWithFixedDelay( () -> repair(repairType, 60000), - INITIAL_REPAIR_DELAY_SEC, + config.getInitialSchedulerDelay(repairType).toSeconds(), config.getRepairCheckInterval().toSeconds(), TimeUnit.SECONDS); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index bd20c1d3c548..8e1c321dd1c0 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -254,6 +254,17 @@ public String getTokenRangeSplitter(RepairType repairType) return applyOverrides(repairType, opt -> opt.token_range_splitter); } + public void setInitialSchedulerDelay(RepairType repairType, String initialSchedulerDelay) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).intial_scheduler_delay = new DurationSpec.IntSecondsBound(initialSchedulerDelay); + } + + public DurationSpec.IntSecondsBound getInitialSchedulerDelay(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.intial_scheduler_delay); + } + // Options configures auto-repair behavior for a given repair type. // All fields can be modified dynamically. public static class Options implements Serializable @@ -288,6 +299,7 @@ protected static Options getDefaultOptions() opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.mv_repair_enabled = true; opts.token_range_splitter = DefaultAutoRepairTokenSplitter.class.getName(); + opts.intial_scheduler_delay = new DurationSpec.IntSecondsBound("15m"); // 15 minutes return opts; } @@ -353,6 +365,8 @@ protected static Options getDefaultOptions() // the default is DefaultAutoRepairTokenSplitter.class.getName(). The class should implement IAutoRepairTokenRangeSplitter. // The default implementation splits the tokens based on the token ranges owned by this node divided by the number of 'number_of_subranges' public volatile String token_range_splitter; + // the minimum delay in seconds after a node starts before the scheduler starts running repair + public volatile DurationSpec.IntSecondsBound intial_scheduler_delay; public String toString() { @@ -371,6 +385,7 @@ public String toString() ", table_max_repair_time=" + table_max_repair_time + ", mv_repair_enabled=" + mv_repair_enabled + ", token_range_splitter=" + token_range_splitter + + ", intial_scheduler_delay=" + intial_scheduler_delay + '}'; } } diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index 49ccf0f2aa24..feaf4c98ae17 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -71,6 +71,7 @@ private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, Au sb.append("\n\tnumber of parallel repairs within group: " + config.getParallelRepairCountInGroup(repairType)); sb.append("\n\tpercentage of parallel repairs within group: " + config.getParallelRepairPercentageInGroup(repairType)); sb.append("\n\tmv repair enabled: " + config.getMVRepairEnabled(repairType)); + sb.append("\n\tinitial scheduler delay: " + config.getInitialSchedulerDelay(repairType)); return sb.toString(); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index dea0c9a827c3..aa3db7835d54 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -450,4 +450,22 @@ public void testInvalidTokenRangeSplitter() assertEquals(DefaultAutoRepairTokenSplitter.class.getName(), FBUtilities.newAutoRepairTokenRangeSplitter("invalid-class").getClass().getName()); } + @Test + public void testSetInitialSchedulerDelay() + { + config.setInitialSchedulerDelay(repairType, "5s"); + + assert config.repair_type_overrides.get(repairType).intial_scheduler_delay.toSeconds() == 5; + } + + @Test + public void testGetInitialSchedulerDelay() + { + config.global_settings.intial_scheduler_delay = new DurationSpec.IntSecondsBound("5s"); + + int result = config.getInitialSchedulerDelay(repairType).toSeconds(); + + assertEquals(5, result); + } + } From dcc53150b2da63438a0b1d44bdbf28812bfa5b5a Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Sun, 18 Aug 2024 10:19:20 -0700 Subject: [PATCH 009/257] Implement nodetool sstablerepairedset --- .../cassandra/service/StorageService.java | 40 +++++- .../service/StorageServiceMBean.java | 6 + .../org/apache/cassandra/tools/NodeProbe.java | 10 ++ .../org/apache/cassandra/tools/NodeTool.java | 1 + .../tools/nodetool/SSTableRepairedSet.java | 112 ++++++++++++++++ .../service/StorageServiceServerTest.java | 124 ++++++++++++++++++ .../cassandra/tools/JMXStandardsTest.java | 2 + .../nodetool/SSTableRepairedSetTest.java | 114 ++++++++++++++++ 8 files changed, 407 insertions(+), 2 deletions(-) create mode 100644 src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java create mode 100644 test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index bb8a56ba00ee..bce80b11b6d4 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -71,7 +71,8 @@ import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.common.util.concurrent.Uninterruptibles; -import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; + +import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.repair.autorepair.AutoRepair; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -5660,9 +5661,44 @@ public void alterTopology(String changes) AlterTopology transform = new AlterTopology(updates, ClusterMetadataService.instance().placementProvider()); ClusterMetadataService.instance() .commit(transform, - m -> { logger.info("Rack changes committed successfully"); return m; }, + m -> { + logger.info("Rack changes committed successfully"); + return m; + }, (c, r) -> { throw new IllegalArgumentException("Unable to commit rack changes: " + r); }); } + + public List getTablesForKeyspace(String keyspace) { + return Keyspace.open(keyspace).getColumnFamilyStores().stream().map(cfs -> cfs.name).collect(Collectors.toList()); + } + + public List mutateSSTableRepairedState(boolean repaired, boolean preview, String keyspace, List tableNames) throws InvalidRequestException + { + Map tables = Keyspace.open(keyspace).getColumnFamilyStores() + .stream().collect(Collectors.toMap(c -> c.name, c -> c)); + for (String tableName : tableNames) { + if (!tables.containsKey(tableName)) + throw new InvalidRequestException("Table " + tableName + " does not exist in keyspace " + keyspace); + } + + // only select SSTables that are unrepaired when repaired is true and vice versa + Predicate predicate = sst -> repaired != sst.isRepaired(); + + // mutate SSTables + long repairedAt = !repaired ? 0 : currentTimeMillis(); + List sstablesTouched = new ArrayList<>(); + for (String tableName : tableNames) { + ColumnFamilyStore table = tables.get(tableName); + Set result = table.runWithCompactionsDisabled(() -> { + Set sstables = table.getLiveSSTables().stream().filter(predicate).collect(Collectors.toSet()); + if (!preview) + table.getCompactionStrategyManager().mutateRepaired(sstables, repairedAt, null, false); + return sstables; + }, predicate, OperationType.ANTICOMPACTION, true, false, true); + sstablesTouched.addAll(result.stream().map(sst -> sst.descriptor.baseFile().name()).collect(Collectors.toList())); + } + return sstablesTouched; + } } diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index 61c378a4bddd..c361574deb04 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -37,6 +37,7 @@ import org.apache.cassandra.db.ColumnFamilyStoreMBean; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.BreaksJMX; +import org.apache.cassandra.exceptions.InvalidRequestException; public interface StorageServiceMBean extends NotificationEmitter { @@ -1378,4 +1379,9 @@ public void enableAuditLog(String loggerName, String includedKeyspaces, String e boolean getPaxosRepairRaceWait(); // Comma delimited list of "nodeId=dc:rack" or "endpoint=dc:rack" void alterTopology(String updates); + /** Gets the names of all tables for the given keyspace */ + public List getTablesForKeyspace(String keyspace); + + /** Mutates the repaired state of all SSTables for the given SSTables */ + public List mutateSSTableRepairedState(boolean repaired, boolean preview, String keyspace, List tables) throws InvalidRequestException; } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 2cde23e82467..7b1f85cb1f97 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -90,6 +90,7 @@ import org.apache.cassandra.db.compaction.CompactionManagerMBean; import org.apache.cassandra.db.virtual.CIDRFilteringMetricsTable; import org.apache.cassandra.db.virtual.CIDRFilteringMetricsTableMBean; +import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.fql.FullQueryLoggerOptions; import org.apache.cassandra.fql.FullQueryLoggerOptionsCompositeData; import org.apache.cassandra.gms.FailureDetector; @@ -2638,6 +2639,15 @@ public void setMVRepairEnabled(AutoRepairConfig.RepairType repairType, boolean e { autoRepairProxy.setMVRepairEnabled(repairType, enabled); } + + public List mutateSSTableRepairedState(boolean repair, boolean preview, String keyspace, List tables) throws InvalidRequestException + { + return ssProxy.mutateSSTableRepairedState(repair, preview, keyspace, tables); + } + + public List getTablesForKeyspace(String keyspace) { + return ssProxy.getTablesForKeyspace(keyspace); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index ff49a9958de1..ce0fc7ef225e 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -219,6 +219,7 @@ public int execute(String... args) SetTraceProbability.class, Sjk.class, Snapshot.class, + SSTableRepairedSet.class, Status.class, StatusAutoCompaction.class, StatusBackup.class, diff --git a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java new file mode 100644 index 000000000000..32ee09f93590 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + + +import io.airlift.airline.Arguments; +import io.airlift.airline.Command; +import io.airlift.airline.Option; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool; + +@Command(name = "sstablerepairedset", description = "Set the repaired state of SSTables for given keyspace/tables") +public class SSTableRepairedSet extends NodeTool.NodeToolCmd +{ + @Arguments(usage = " []", description = "The keyspace optionally followed by one or more tables", required = true) + protected List args = new ArrayList<>(); + + @Option(title = "really-set", + name = { "--really-set" }, + description = "Really set the repaired state of SSTables. If not set, only print SSTables that would be affected.") + protected boolean reallySet = false; + + @Option(title = "is-repaired", + name = { "--is-repaired" }, + description = "Set SSTables to repaired state.") + protected boolean isRepaired = false; + + @Option(title = "is-unrepaired", + name = { "--is-unrepaired" }, + description = "Set SSTables to unrepaired state.") + protected boolean isUnrepaired = false; + + @Override + public void execute(NodeProbe probe) + { + PrintStream out = probe.output().out; + + String message; + if (reallySet) + { + message = "Mutating repaired state of SSTables for"; + } + else + { + message = "Previewing repaired state mutation of SSTables for"; + } + + if (args.isEmpty()) + { + out.println("At least a keyspace name must be provided."); + return; + } + String keyspace = args.get(0); + + List tables; + if (args.size() > 1) + { + tables = args.subList(1, args.size()); + message += " tables " + String.join(", ", tables) + " in"; + } + else + { + tables = probe.getTablesForKeyspace(keyspace); + message += " all tables in"; + } + message += " keyspace " + keyspace; + + if (isRepaired == isUnrepaired) + { + out.println("Exactly one of --is-repaired or --is-unrepaired must be provided."); + return; + } + message += " to " + (isRepaired ? "repaired" : "unrepaired"); + out.println(message); + + try + { + List mutatedSSTables = probe.mutateSSTableRepairedState(isRepaired, !reallySet, keyspace, new ArrayList<>(tables)); + if (!reallySet) + out.println("The following SSTables would be mutated:"); + else + out.println("The following SSTables were mutated:"); + for (String sstable : mutatedSSTables) + out.println(sstable); + } + catch (InvalidRequestException e) + { + out.println(e.getMessage()); + } + } +} diff --git a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java index 7814b82ba08a..aaa964caa0f9 100644 --- a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java +++ b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java @@ -31,6 +31,13 @@ import java.util.UUID; import com.google.common.collect.Sets; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; + import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -48,6 +55,8 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.WithPartitioner; import org.apache.cassandra.schema.KeyspaceMetadata; @@ -67,10 +76,17 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.GOSSIP_DISABLE_THREAD_VALIDATION; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + public class StorageServiceServerTest { + public static final String keyspace = "test_keyspace"; + public static ColumnFamilyStore table1; + public static ColumnFamilyStore table2; + static final String DC1 = "DC1"; static final String DC2 = "DC2"; static final String RACK = "rack1"; @@ -95,6 +111,21 @@ public static void setUp() throws ConfigurationException, UnknownHostException id5 = InetAddressAndPort.getByName("127.0.0.5"); registerNodes(); ServerTestUtils.markCMS(); + + SchemaLoader.createKeyspace(keyspace, KeyspaceParams.simple(1), + SchemaLoader.standardCFMD(keyspace, "table1").build(), + SchemaLoader.standardCFMD(keyspace, "table2").build()); + table1 = Keyspace.open(keyspace).getColumnFamilyStore("table1"); + assert table1 != null; + table2 = Keyspace.open(keyspace).getColumnFamilyStore("table2"); + assert table2 != null; + } + + @Before + public void clearData() + { + table1.truncateBlocking(); + table2.truncateBlocking(); } private static void registerNodes() @@ -617,4 +648,97 @@ private static AuditLogOptions getBaseAuditLogOptions() throws IOException return options; } + + @Test + public void testGetTablesForKeyspace() + { + List result = StorageService.instance.getTablesForKeyspace(keyspace); + + assertEquals(Arrays.asList(table1.name, table2.name), result.stream().sorted().collect(Collectors.toList())); + } + + @Test + public void testGetTablesForKeyspaceNotFound() + { + String missingKeyspace = "MISSING_KEYSPACE"; + try + { + StorageService.instance.getTablesForKeyspace(missingKeyspace); + fail("Expected an AssertionError to be thrown"); + } + catch (AssertionError e) + { + assertEquals("Unknown keyspace " + missingKeyspace, e.getMessage()); + } + } + + @Test + public void testMutateSSTableRepairedStateTableNotFound() + { + try + { + StorageService.instance.mutateSSTableRepairedState(true, false, keyspace, Arrays.asList("MISSING_TABLE")); + fail("Expected an InvalidRequestException to be thrown"); + } + catch (InvalidRequestException e) + { + // Test passed + } + } + + @Test + public void testMutateSSTableRepairedStateTablePreview() + { + SchemaLoader.insertData(keyspace, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + assertEquals(1, table1.getLiveSSTables().size()); + + List result = StorageService.instance.mutateSSTableRepairedState(true, true, keyspace, Arrays.asList(table1.name)); + + assertEquals(1, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } + + @Test + public void testMutateSSTableRepairedStateTableRepaired() + { + SchemaLoader.insertData(keyspace, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + SchemaLoader.insertData(keyspace, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + assertEquals(2, table1.getLiveSSTables().size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + }); + + List result = StorageService.instance.mutateSSTableRepairedState(true, false, keyspace, Arrays.asList(table1.name)); + + assertEquals(2, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertTrue(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } + + @Test + public void testMutateSSTableRepairedStateTableUnrepaired() throws Exception + { + SchemaLoader.insertData(keyspace, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + SchemaLoader.insertData(keyspace, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + table1.getCompactionStrategyManager().mutateRepaired(table1.getLiveSSTables(), 1, null, false); + assertEquals(2, table1.getLiveSSTables().stream().filter(SSTableReader::isRepaired).count()); + + List result = StorageService.instance.mutateSSTableRepairedState(false, false, keyspace, Arrays.asList(table1.name)); + + assertEquals(2, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } } diff --git a/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java b/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java index 87b9ff93fded..62b6b9b5ec16 100644 --- a/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java +++ b/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java @@ -50,6 +50,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.utils.BreaksJMX; import org.assertj.core.api.Assertions; import org.reflections.Reflections; @@ -98,6 +99,7 @@ public class JMXStandardsTest .add(IllegalStateException.class) .add(ClassNotFoundException.class) .add(OpenDataException.class) + .add(InvalidRequestException.class) .build(); /** * This list is a set of types under java.* and javax.*, but are too vague that could cause issues; this does not diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java new file mode 100644 index 000000000000..0a2ead81b778 --- /dev/null +++ b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.Output; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class SSTableRepairedSetTest +{ + @Mock + private NodeProbe probe; + + private SSTableRepairedSet cmd; + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + PrintStream noopStream = new PrintStream(new OutputStream() + { + @Override + public void write(int b) + { + } + }); + when(probe.output()).thenReturn(new Output(noopStream, noopStream)); + cmd = new SSTableRepairedSet(); + } + + @Test + public void testNoKeyspace() + { + cmd.execute(probe); + verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); + } + + @Test + public void testBothRepairedAndUnrepaired() + { + cmd.args = Arrays.asList("keyspace"); + cmd.execute(probe); + verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); + } + + @Test + public void testNeitherRepairedNorUnrepaired() + { + cmd.args = Arrays.asList("keyspace"); + cmd.execute(probe); + verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); + } + + @Test + public void testRepairedPreview() + { + cmd.args = Arrays.asList("keyspace"); + cmd.isRepaired = true; + cmd.execute(probe); + verify(probe).mutateSSTableRepairedState(true, true, "keyspace", new ArrayList<>()); + } + + @Test + public void testUnrepairedReallySet() + { + cmd.args = Arrays.asList("keyspace"); + cmd.isUnrepaired = true; + cmd.reallySet = true; + cmd.execute(probe); + verify(probe).mutateSSTableRepairedState(false, false, "keyspace", new ArrayList<>()); + } + + @Test + public void testExecuteWithTableNames() + { + cmd.args = Arrays.asList("keyspace", "table1", "table2"); + cmd.isRepaired = true; + cmd.reallySet = true; + cmd.execute(probe); + verify(probe).mutateSSTableRepairedState(true, false, "keyspace", Arrays.asList("table1", "table2")); + } +} From 6c4e4a0818636d4cbf827981f2d3b969315ced3e Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Sun, 25 Aug 2024 14:07:59 -0700 Subject: [PATCH 010/257] Change SkippedTablesCount metric to SkippedTokenRangesCount --- .../cassandra/metrics/AutoRepairMetrics.java | 7 +++--- .../repair/autorepair/AutoRepair.java | 22 +++++++++-------- .../repair/autorepair/AutoRepairConfig.java | 10 ++++---- .../repair/autorepair/AutoRepairState.java | 24 +++++++++---------- .../autorepair/AutoRepairConfigTest.java | 4 ++-- .../AutoRepairParameterizedTest.java | 19 +++++++-------- .../autorepair/AutoRepairStateTest.java | 12 +++++----- 7 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index 426034ffcd4b..d765899631b7 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -34,10 +34,11 @@ public class AutoRepairMetrics public Gauge repairsInProgress; public Gauge nodeRepairTimeInSec; public Gauge clusterRepairTimeInSec; - public Gauge skippedTablesCount; public Gauge longestUnrepairedSec; public Gauge succeededTokenRangesCount; public Gauge failedTokenRangesCount; + public Gauge skippedTokenRangesCount; + public Counter repairTurnMyTurn; public Counter repairTurnMyTurnDueToPriority; public Counter repairTurnMyTurnForceRepair; @@ -72,11 +73,11 @@ public Integer getValue() } }); - skippedTablesCount = Metrics.register(factory.createMetricName("SkippedTablesCount"), new Gauge() + skippedTokenRangesCount = Metrics.register(factory.createMetricName("SkippedTokenRangesCount"), new Gauge() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getRepairSkippedTablesCount(); + return AutoRepair.instance.getRepairState(repairType).getSkippedTokenRangesCount(); } }); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index c7ed1d708155..937370720a32 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -177,13 +177,13 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId, timeFunc.get(), turn); repairState.setRepairKeyspaceCount(0); - repairState.setRepairSkippedTablesCount(0); repairState.setRepairInProgress(true); repairState.setTotalTablesConsideredForRepair(0); repairState.setTotalMVTablesConsideredForRepair(0); int failedTokenRanges = 0; int succeededTokenRanges = 0; + int skippedTokenRanges = 0; List keyspaces = new ArrayList<>(); Keyspace.all().forEach(keyspaces::add); @@ -206,6 +206,9 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) String keyspaceName = keyspace.getName(); try { + List> subRangesToBeRepaired = tokenRangeSplitters.get(repairType).getRange(repairType, primaryRangeOnly, keyspaceName, tableName); + int totalSubRanges = subRangesToBeRepaired.size(); + ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); if (!columnFamilyStore.metadata().params.automatedRepair.get(repairType).repairEnabled()) { @@ -220,7 +223,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) { logger.info("Too many SSTables for repair, not doing repair on table {}.{} " + "totalSSTables {}", keyspaceName, tableName, columnFamilyStore.getLiveSSTables().size()); - repairState.setRepairSkippedTablesCount(repairState.getRepairSkippedTablesCount() + 1); + skippedTokenRanges += totalSubRanges; continue; } @@ -234,8 +237,6 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) } long tableStartTime = timeFunc.get(); Set> ranges = new HashSet<>(); - List> subRangesToBeRepaired = tokenRangeSplitters.get(repairType).getRange(repairType, primaryRangeOnly, keyspaceName, tableName); - int totalSubRanges = subRangesToBeRepaired.size(); int totalProcessedSubRanges = 0; for (Pair token : subRangesToBeRepaired) { @@ -250,7 +251,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) { if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, tableStartTime, tablesToBeRepaired.size())) { - repairState.setRepairSkippedTablesCount(repairState.getRepairSkippedTablesCount() + tablesToBeRepaired.size()); + skippedTokenRanges += totalSubRanges - totalProcessedSubRanges; logger.info("Keyspace took too much time to repair hence skipping it {}", keyspaceName); break; @@ -260,7 +261,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) { if (AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) { - repairState.setRepairSkippedTablesCount(repairState.getRepairSkippedTablesCount() + 1); + skippedTokenRanges += totalSubRanges - totalProcessedSubRanges; logger.info("Table took too much time to repair hence skipping it {}.{}", keyspaceName, tableName); break; @@ -326,7 +327,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) } } } - cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, millisToWait, failedTokenRanges, succeededTokenRanges); + cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, millisToWait, failedTokenRanges, succeededTokenRanges, skippedTokenRanges); } else { @@ -385,7 +386,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon } private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, - long startTime, long millisToWait, int failedTokenRanges, int succeededTokenRanges) throws InterruptedException + long startTime, long millisToWait, int failedTokenRanges, int succeededTokenRanges, int skippedTokenRanges) throws InterruptedException { //if it was due to priority then remove it now if (turn == MY_TURN_DUE_TO_PRIORITY) @@ -396,13 +397,14 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairState.setFailedTokenRangesCount(failedTokenRanges); repairState.setSucceededTokenRangesCount(succeededTokenRanges); + repairState.setSkippedTokenRangesCount(skippedTokenRanges); repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + "repairTokenRangesSuccessCount {}, repairTokenRangesFailureCount {}, " + - "repairTableSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), + "repairTokenRangesSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), repairState.getSucceededTokenRangesCount(), repairState.getFailedTokenRangesCount(), - repairState.getRepairSkippedTablesCount()); + repairState.getSkippedTokenRangesCount()); if (repairState.getLastRepairTime() != 0) { repairState.setClusterRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 8e1c321dd1c0..5cbb4f9f5bb6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -257,12 +257,12 @@ public String getTokenRangeSplitter(RepairType repairType) public void setInitialSchedulerDelay(RepairType repairType, String initialSchedulerDelay) { ensureOverrides(repairType); - repair_type_overrides.get(repairType).intial_scheduler_delay = new DurationSpec.IntSecondsBound(initialSchedulerDelay); + repair_type_overrides.get(repairType).initial_scheduler_delay = new DurationSpec.IntSecondsBound(initialSchedulerDelay); } public DurationSpec.IntSecondsBound getInitialSchedulerDelay(RepairType repairType) { - return applyOverrides(repairType, opt -> opt.intial_scheduler_delay); + return applyOverrides(repairType, opt -> opt.initial_scheduler_delay); } // Options configures auto-repair behavior for a given repair type. @@ -299,7 +299,7 @@ protected static Options getDefaultOptions() opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.mv_repair_enabled = true; opts.token_range_splitter = DefaultAutoRepairTokenSplitter.class.getName(); - opts.intial_scheduler_delay = new DurationSpec.IntSecondsBound("15m"); // 15 minutes + opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("15m"); // 15 minutes return opts; } @@ -366,7 +366,7 @@ protected static Options getDefaultOptions() // The default implementation splits the tokens based on the token ranges owned by this node divided by the number of 'number_of_subranges' public volatile String token_range_splitter; // the minimum delay in seconds after a node starts before the scheduler starts running repair - public volatile DurationSpec.IntSecondsBound intial_scheduler_delay; + public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; public String toString() { @@ -385,7 +385,7 @@ public String toString() ", table_max_repair_time=" + table_max_repair_time + ", mv_repair_enabled=" + mv_repair_enabled + ", token_range_splitter=" + token_range_splitter + - ", intial_scheduler_delay=" + intial_scheduler_delay + + ", intial_scheduler_delay=" + initial_scheduler_delay + '}'; } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index fc1538e21268..c734c4bdb122 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -76,8 +76,6 @@ public abstract class AutoRepairState implements ProgressListener @VisibleForTesting protected int repairKeyspaceCount = 0; @VisibleForTesting - protected int repairTableSkipCount = 0; - @VisibleForTesting protected int totalMVTablesConsideredForRepair = 0; @VisibleForTesting protected int totalDisabledTablesRepairCount = 0; @@ -87,6 +85,8 @@ public abstract class AutoRepairState implements ProgressListener @VisibleForTesting protected int succeededTokenRangesCount = 0; @VisibleForTesting + protected int skippedTokenRangesCount = 0; + @VisibleForTesting protected AutoRepairHistory longestUnrepairedNode; @VisibleForTesting protected Condition condition = newOneTimeCondition(); @@ -184,16 +184,6 @@ public boolean isRepairInProgress() return repairInProgress; } - public void setRepairSkippedTablesCount(int count) - { - repairTableSkipCount = count; - } - - public int getRepairSkippedTablesCount() - { - return repairTableSkipCount; - } - public int getLongestUnrepairedSec() { if (longestUnrepairedNode == null) @@ -258,6 +248,16 @@ public int getSucceededTokenRangesCount() return succeededTokenRangesCount; } + public void setSkippedTokenRangesCount(int count) + { + skippedTokenRangesCount = count; + } + + public int getSkippedTokenRangesCount() + { + return skippedTokenRangesCount; + } + public boolean isSuccess() { return success; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index aa3db7835d54..923e35b73796 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -455,13 +455,13 @@ public void testSetInitialSchedulerDelay() { config.setInitialSchedulerDelay(repairType, "5s"); - assert config.repair_type_overrides.get(repairType).intial_scheduler_delay.toSeconds() == 5; + assert config.repair_type_overrides.get(repairType).initial_scheduler_delay.toSeconds() == 5; } @Test public void testGetInitialSchedulerDelay() { - config.global_settings.intial_scheduler_delay = new DurationSpec.IntSecondsBound("5s"); + config.global_settings.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5s"); int result = config.getInitialSchedulerDelay(repairType).toSeconds(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 2d94d7ebda0c..cbc2622c89ec 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -65,7 +65,6 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.progress.ProgressEvent; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; @@ -98,8 +97,6 @@ public class AutoRepairParameterizedTest extends CQLTester @Mock ScheduledExecutorPlus mockExecutor; @Mock - ProgressEvent progressEvent; - @Mock AutoRepairState autoRepairState; @Mock RepairCoordinator repairRunnable; @@ -419,15 +416,15 @@ public void testSkipRepairSSTableCountHigherThreshold() int beforeCount = config.getRepairSSTableCountHigherThreshold(repairType); config.setMVRepairEnabled(repairType, true); config.setRepairSSTableCountHigherThreshold(repairType, 9); - assertEquals(0, state.getRepairSkippedTablesCount()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertEquals(0, state.getSkippedTokenRangesCount()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); state.setLastRepairTime(0); AutoRepair.instance.repair(repairType, 0); assertEquals(1, state.getTotalMVTablesConsideredForRepair()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); // skipping one time for the base table and another time for MV table - assertEquals(2, state.getRepairSkippedTablesCount()); - assertEquals(2, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertEquals(2, state.getSkippedTokenRangesCount()); + assertEquals(2, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); // set it to higher value, and this time, the tables should not be skipped config.setRepairSSTableCountHigherThreshold(repairType, 11); @@ -435,9 +432,9 @@ public void testSkipRepairSSTableCountHigherThreshold() state.setLastRepairTime(0); AutoRepair.instance.repair(repairType, 0); assertEquals(1, state.getTotalMVTablesConsideredForRepair()); - assertEquals(0, state.getRepairSkippedTablesCount()); + assertEquals(0, state.getSkippedTokenRangesCount()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); } @Test @@ -470,7 +467,7 @@ public void testMetrics() assertTrue(AutoRepairMetricsManager.getMetrics(repairType).nodeRepairTimeInSec.getValue() > 0); assertTrue(AutoRepairMetricsManager.getMetrics(repairType).clusterRepairTimeInSec.getValue() > 0); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).repairTurnMyTurn.getCount()); - assertTrue(AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue() > 0); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue().intValue()); config.setAutoRepairTableMaxRepairTime(repairType, String.valueOf(Integer.MAX_VALUE-1) + 's'); @@ -482,7 +479,7 @@ public void testMetrics() when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); AutoRepair.instance.repair(repairType, 0); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); assertTrue(AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue() > 0); assertTrue(AutoRepairMetricsManager.getMetrics(repairType).succeededTokenRangesCount.getValue() > 0); assertTrue(AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue() > 0); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index 19aa0a8b439c..340a21f97bfa 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -203,20 +203,20 @@ public void testIsRepairInProgress() { } @Test - public void testSetRepairSkippedTablesCount() { + public void testSetSkippedTokenRangesCount() { AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.setRepairSkippedTablesCount(1); + state.setSkippedTokenRangesCount(1); - assertEquals(1, state.repairTableSkipCount); + assertEquals(1, state.skippedTokenRangesCount); } @Test - public void testGetRepairSkippedTablesCount() { + public void testGetSkippedTokenRangesCount() { AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.repairTableSkipCount = 1; + state.skippedTokenRangesCount = 1; - assertEquals(1, state.getRepairSkippedTablesCount()); + assertEquals(1, state.getSkippedTokenRangesCount()); } @Test From 7b7d1605dae29fb1279f358d08832d85c2f9e1b7 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 26 Aug 2024 15:12:41 -0700 Subject: [PATCH 011/257] config description --- .../pages/managing/operating/metrics.adoc | 21 +++++----- .../repair/autorepair/AutoRepairConfig.java | 42 +++++++++++-------- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 35a0026ac1db..4d52f4145c5d 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1088,9 +1088,9 @@ Metrics specifc to automated repair. Reported name format: *Metric Name*:: -`org.apache.cassandra.metrics.AutoRepairMetrics.` +`org.apache.cassandra.metrics.AutoRepair.` *JMX MBean*:: -`org.apache.cassandra.metrics:type=AutoRepairMetrics name=` +`org.apache.cassandra.metrics:type=AutoRepair name= repairType=` [cols=",,",options="header",] |=== @@ -1099,19 +1099,20 @@ Reported name format: on the node |NodeRepairTimeInSec |Gauge |Time taken to repair -the node +the node in seconds |ClusterRepairTimeInSec |Gauge |Time taken to repair -the entire Cassandra cluster - -|SkippedTablesCount |Gauge |Number of tables skipped -on the node +the entire Cassandra cluster in seconds |LongestUnrepairedSec |Gauge |Time since the last repair -ran on the node +ran on the node in seconds + +|SucceededTokenRangesCount |Gauge |Number of token ranges successfully repaired on the node -|FailedTablesCount |Gauge |Number of tables encountered -failure during repair on the node +|FailedTokenRangesCount |Gauge |Number of token ranges failed to repair on the node + +|SkippedTokenRangesCount |Gauge |Number of token ranges skipped +on the node |TotalMVTablesConsideredForRepair |Gauge |Number of materialized views considered on the node diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 5cbb4f9f5bb6..9522d35bcdc8 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -37,10 +37,16 @@ public class AutoRepairConfig implements Serializable { // enable/disable auto repair globally, overrides all other settings. Cannot be modified dynamically. + // if it is set to false, then no repair will be scheduled, including full and incremental repairs by this framework. + // if it is set to true, then this repair scheduler will consult another config available for each RepairType, and based on that config, it will schedule repairs. public final Boolean enabled; - // the interval in seconds between checks for eligible repair operations. Cannot be modified dynamically. + // the interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if + // none is going, then check if it's time to schedule or wait public final DurationSpec.IntSecondsBound repair_check_interval = new DurationSpec.IntSecondsBound("5m"); - // configures how long repair history is kept for a replaced node + // when any nodes leave the ring then the repair schedule needs to adjust the order, etc. + // the repair scheduler keeps the deleted hosts information in its persisted metadata for the defined interval in this config. + // This information is useful so the scheduler is absolutely sure that the node is indeed removed from the ring, and then it can adjust the repair schedule accordingly. + // So, the duration in this config determinses for how long deleted host's information is kept in the scheduler's metadata. public volatile DurationSpec.IntSecondsBound history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("2h"); // global_settings overides Options.defaultOptions for all repair types public volatile Options global_settings; @@ -287,7 +293,7 @@ protected static Options getDefaultOptions() opts.enabled = false; opts.repair_by_keyspace = false; - opts.number_of_subranges = 1; + opts.number_of_subranges = 16; opts.number_of_repair_threads = 1; opts.parallel_repair_count_in_group = 1; opts.parallel_repair_percentage_in_group = 0; @@ -316,32 +322,32 @@ protected static Options getDefaultOptions() // If you do not use v-nodes or the number of v-nodes is pretty small, say 8, setting this value to a higher number, say 16, will be useful to repair on a smaller range, and the chance of succeeding is higher. public volatile Integer number_of_subranges; // the number of repair threads to run for a given invoked Repair Job. - // Once the scheduler schedules one Job, then howmany threads to use inside that job will be controlled through this parameter. + // Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. // This is similar to -j for repair options for the nodetool repair command. public volatile Integer number_of_repair_threads; // the number of repair sessions that can run in parallel in a single group // The number of nodes running repair parallelly. If parallelrepaircount is set, it will choose the larger value of the two. The default is 3. // This configuration controls how many nodes would run repair in parallel. - // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. + // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. // If one or more node(s) finish repair, then the framework automatically picks up the next candidate and ensures the maximum number of nodes running repair do not exceed “3”. public volatile Integer parallel_repair_count_in_group; // the number of repair sessions that can run in parallel in a single groupas a percentage // of the total number of nodes in the group [0,100] // The percentage of nodes in the cluster that run repair parallelly. If parallelrepaircount is set, it will choose the larger value of the two. - //The problem with a fixed number of nodes (the above property) is that in a large-scale environment, + // The problem with a fixed number of nodes (the above property) is that in a large-scale environment, // the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. - //The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. + // The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. // So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. // Extremely fewer manual interventions as it will rarely violate the repair SLA for customers public volatile Integer parallel_repair_percentage_in_group; // the upper threshold of SSTables allowed to participate in a single repair session // Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. - // This is to avoid penalizing good neighbors with an outlier. + // This is to avoid penalizing good tables (neighbors) with an outlier. public volatile Integer sstable_upper_threshold; - // the minimum time in hours between repairs of the same token range - // The minimum number of hours to run one repair cycle is 24 hours. The default is 24 hours. - // This means that if auto repair finishes one round on one cluster within 24 hours, it won’t start a new round. - // This is applicable for extremely tiny clusters, say 3 nodes. + // the minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes + // repair quicly. + // The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round + // until the last repair conducted on a given node is < 24 hours. public volatile DurationSpec.IntSecondsBound min_repair_interval; // specifies a denylist of datacenters to repair // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. @@ -350,22 +356,22 @@ protected static Options getDefaultOptions() // It is the same as -pr in nodetool repair options. public volatile Boolean repair_primary_token_range_only; // configures whether to force immediate repair on new nodes + // default it is set to 'false'; this is useful if you want to repair new nodes immediately after they join the ring. public volatile Boolean force_repair_new_node; - // the maximum time in seconds that a repair session can run for a single table - // Max time for repairing one table, if exceeded, skip the table. The default is 6 * 60 * 60, which is 6 hours. + // the maximum time that a repair session can run for a single table + // Max time for repairing one table on a given node, if exceeded, skip the table. The default is 6 hours. // Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. // Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. - // Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator telling them they are violating SLA even if I am a neighbor, and it would require a lot of back-and-forth manual interventions, etc. + // Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. - // Please note the repair will still run in parallel on other nodes, this is to address outliers on a given node. public volatile DurationSpec.IntSecondsBound table_max_repair_time; - // the default is 'true'. MVs are mutated at LOCAL_ONE consistency level in Cassandra. + // the default is 'true'. // This flag determines whether the auto-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. public volatile Boolean mv_repair_enabled; // the default is DefaultAutoRepairTokenSplitter.class.getName(). The class should implement IAutoRepairTokenRangeSplitter. // The default implementation splits the tokens based on the token ranges owned by this node divided by the number of 'number_of_subranges' public volatile String token_range_splitter; - // the minimum delay in seconds after a node starts before the scheduler starts running repair + // the minimum delay after a node starts before the scheduler starts running repair public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; public String toString() From a908bb1968157f3620dc5ab4e85a42149f5ccf8a Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Wed, 28 Aug 2024 20:45:16 -0700 Subject: [PATCH 012/257] Implement repair retries for auto-repair scheduler --- .../repair/autorepair/AutoRepair.java | 62 +++- .../repair/autorepair/AutoRepairConfig.java | 25 ++ .../repair/autorepair/AutoRepairUtils.java | 2 +- .../cassandra/service/AutoRepairService.java | 12 + .../service/AutoRepairServiceMBean.java | 2 + .../org/apache/cassandra/tools/NodeProbe.java | 10 + .../tools/nodetool/GetAutoRepairConfig.java | 2 + .../tools/nodetool/SetAutoRepairConfig.java | 31 +- .../AutoRepairParameterizedTest.java | 57 ++++ .../service/AutoRepairServiceTest.java | 203 ++++++++++++ .../nodetool/SetAutoRepairConfigTest.java | 312 ++++++++++++++++++ 11 files changed, 690 insertions(+), 28 deletions(-) create mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java create mode 100644 test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 937370720a32..2fa1db89c386 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -27,11 +27,13 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Supplier; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.Uninterruptibles; import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.service.StorageService; @@ -80,6 +82,9 @@ public class AutoRepair @VisibleForTesting protected static Consumer> shuffleFunc = java.util.Collections::shuffle; + @VisibleForTesting + protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; + protected final Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); @@ -277,20 +282,38 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) if ((totalProcessedSubRanges % config.getRepairThreads(repairType) == 0) || (totalProcessedSubRanges == totalSubRanges)) { - RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, - config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : ImmutableList.of(tableName), - ranges, primaryRangeOnly); - repairState.resetWaitCondition(); - Future f = repairRunnableExecutors.get(repairType).submit(task); - try + int retryCount = 0; + Future f = null; + while (retryCount <= config.getRepairMaxRetries()) { - repairState.waitForRepairToComplete(); + RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, + config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : ImmutableList.of(tableName), + ranges, primaryRangeOnly); + repairState.resetWaitCondition(); + f = repairRunnableExecutors.get(repairType).submit(task); + try + { + repairState.waitForRepairToComplete(); + } + catch (InterruptedException e) + { + logger.error("Exception in cond await:", e); + } + if (repairState.isSuccess()) + { + break; + } + else if (retryCount < config.getRepairMaxRetries()) + { + boolean cancellationStatus = f.cancel(true); + logger.warn("Repair failed for range {}-{} for {}.{} with cancellationStatus: {} retrying after {} seconds...", + childStartToken, childEndToken, + keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, + cancellationStatus, config.getRepairRetryBackoff().toSeconds()); + sleepFunc.accept(config.getRepairRetryBackoff().toSeconds(), TimeUnit.SECONDS); + } + retryCount++; } - catch (InterruptedException e) - { - logger.error("Exception in cond await:", e); - } - //check repair status if (repairState.isSuccess()) { @@ -301,11 +324,15 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) } else { - boolean cancellationStatus = f.cancel(true); - //in future we can add retry, etc. - logger.info("Repair failed for range {}-{} for {}.{} total subranges: {}," + - "processed subranges: {}, cancellationStatus: {}", childStartToken, childEndToken, - keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, totalSubRanges, totalProcessedSubRanges, cancellationStatus); + boolean cancellationStatus = true; + if (f != null) + { + cancellationStatus = f.cancel(true); + } + //in the future we can add retry, etc. + logger.error("Repair failed for range {}-{} for {}.{} after {} retries, total subranges: {}," + + "processed subranges: {}, cancellationStatus: {}", childStartToken.toString(), childEndToken.toString(), keyspaceName, + config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, retryCount, totalSubRanges, totalProcessedSubRanges, cancellationStatus); failedTokenRanges += ranges.size(); } ranges.clear(); @@ -395,6 +422,7 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType AutoRepairUtils.removePriorityStatus(repairType, myId); } + logger.info("TEST123"); repairState.setFailedTokenRangesCount(failedTokenRanges); repairState.setSucceededTokenRangesCount(succeededTokenRanges); repairState.setSkippedTokenRangesCount(skippedTokenRanges); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 9522d35bcdc8..e3e7d34658e9 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -48,6 +48,11 @@ public class AutoRepairConfig implements Serializable // This information is useful so the scheduler is absolutely sure that the node is indeed removed from the ring, and then it can adjust the repair schedule accordingly. // So, the duration in this config determinses for how long deleted host's information is kept in the scheduler's metadata. public volatile DurationSpec.IntSecondsBound history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("2h"); + // the maximum number of retries for a repair session. + public volatile Integer repair_max_retries = 3; + // the backoff time in seconds for retrying a repair session. + public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("60s"); + // global_settings overides Options.defaultOptions for all repair types public volatile Options global_settings; @@ -108,6 +113,26 @@ public void setAutoRepairHistoryClearDeleteHostsBufferInterval(String duration) history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound(duration); } + public int getRepairMaxRetries() + { + return repair_max_retries; + } + + public void setRepairMaxRetries(int maxRetries) + { + repair_max_retries = maxRetries; + } + + public DurationSpec.LongSecondsBound getRepairRetryBackoff() + { + return repair_retry_backoff; + } + + public void setRepairRetryBackoff(String interval) + { + repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); + } + public boolean isAutoRepairEnabled(RepairType repairType) { return enabled && applyOverrides(repairType, opt -> opt.enabled); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index a041ed84e95e..b74157244ce1 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -733,7 +733,7 @@ public static void addPriorityHosts(RepairType repairType, Set 0) + if (!hostIds.isEmpty()) { SetSerializer serializer = SetSerializer.getInstance(UUIDSerializer.instance, UTF8Type.instance.comparatorSet); addPriorityHost.execute(QueryState.forInternalCalls(), diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index b07d770fb2de..60073b20d4b9 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -104,6 +104,18 @@ public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); } + @Override + public void setAutoRepairMaxRetriesCount(int retries) + { + config.setRepairMaxRetries(retries); + } + + @Override + public void setAutoRepairRetryBackoff(String interval) + { + config.setRepairRetryBackoff(interval); + } + @Override public void setRepairSSTableCountHigherThreshold(RepairType repairType, int sstableHigherThreshold) { diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 5dda1b157517..d01c6d61db1d 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -44,6 +44,8 @@ public interface AutoRepairServiceMBean public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); + public void setAutoRepairMaxRetriesCount(int retries); + public void setAutoRepairRetryBackoff(String interval); public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssTableHigherThreshold); public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime); diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 7b1f85cb1f97..f45c3a40578b 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2607,6 +2607,16 @@ public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) autoRepairProxy.setAutoRepairHistoryClearDeleteHostsBufferDuration(duration); } + public void setAutoRepairMaxRetriesCount(int retries) + { + autoRepairProxy.setAutoRepairMaxRetriesCount(retries); + } + + public void setAutoRepairRetryBackoff(String interval) + { + autoRepairProxy.setAutoRepairRetryBackoff(interval); + } + public void setRepairSSTableCountHigherThreshold(AutoRepairConfig.RepairType repairType, int ssTableHigherThreshold) { autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index feaf4c98ae17..cfb005542efc 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -47,6 +47,8 @@ public void execute(NodeProbe probe) sb.append("repair scheduler configuration:"); sb.append("\n\trepair eligibility check interval: " + config.getRepairCheckInterval()); sb.append("\n\tTTL for repair history for dead nodes: " + config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); + sb.append("\n\tmax retries for repair: " + config.getRepairMaxRetries()); + sb.append("\n\tretry backoff: " + config.getRepairRetryBackoff()); for (RepairType repairType : RepairType.values()) { sb.append(formatRepairTypeConfig(probe, repairType, config)); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index bd00809dcf22..bf2d290ca54d 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -44,7 +44,8 @@ public class SetAutoRepairConfig extends NodeToolCmd description = "autorepair param and value.\nPossible autorepair parameters are as following: " + "[number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + - "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only|parallel_repair_count_in_group|parallel_repair_percentage_in_group|mv_repair_enabled]", + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + + "|parallel_repair_count|parallel_repair_percentage|mv_repair_enabled|repair_max_retries|repair_retry_backoff]", required = true) protected List args = new ArrayList<>(); @@ -58,7 +59,6 @@ public class SetAutoRepairConfig extends NodeToolCmd @Override public void execute(NodeProbe probe) { - checkArgument(repairType != null, "--repair-type is required."); checkArgument(args.size() == 2, "setautorepairconfig requires param-type, and value args."); String paramType = args.get(0); String paramVal = args.get(1); @@ -69,10 +69,21 @@ public void execute(NodeProbe probe) return; } - if (paramType.equals("history_clear_delete_hosts_buffer_interval")) + // options that do not require --repair-type option + switch (paramType) { - probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); - return; + case "history_clear_delete_hosts_buffer_interval": + probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); + return; + case "repair_max_retries": + probe.setAutoRepairMaxRetriesCount(Integer.parseInt(paramVal)); + return; + case "repair_retry_backoff": + probe.setAutoRepairRetryBackoff(paramVal); + return; + default: + // proceed to options that require --repair-type option + break; } // options below require --repair-type option @@ -99,14 +110,14 @@ public void execute(NodeProbe probe) probe.setAutoRepairTableMaxRepairTime(repairType, paramVal); break; case "priority_hosts": - hosts = validateLocalGroupHosts(probe, repairType, paramVal); + hosts = validateLocalGroupHosts(paramVal); if (!hosts.isEmpty()) { probe.setRepairPriorityForHosts(repairType, hosts); } break; case "forcerepair_hosts": - hosts = validateLocalGroupHosts(probe, repairType, paramVal); + hosts = validateLocalGroupHosts(paramVal); if (!hosts.isEmpty()) { probe.setForceRepairForHosts(repairType, hosts); @@ -123,10 +134,10 @@ public void execute(NodeProbe probe) case "repair_primary_token_range_only": probe.setPrimaryTokenRangeOnly(repairType, Boolean.parseBoolean(paramVal)); break; - case "parallel_repair_count_in_group": + case "parallel_repair_count": probe.setParallelRepairCountInGroup(repairType, Integer.parseInt(paramVal)); break; - case "parallel_repair_percentage_in_group": + case "parallel_repair_percentage": probe.setParallelRepairPercentageInGroup(repairType, Integer.parseInt(paramVal)); break; case "mv_repair_enabled": @@ -137,7 +148,7 @@ public void execute(NodeProbe probe) } } - private Set validateLocalGroupHosts(NodeProbe probe, RepairType repairType, String paramVal) { + private Set validateLocalGroupHosts(String paramVal) { Set hosts = new HashSet<>(); for (String host : Splitter.on(',').split(paramVal)) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index cbc2622c89ec..4fb18bf9e0d6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Set; import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import com.google.common.collect.ImmutableMap; @@ -621,4 +622,60 @@ public void testRepairTakesLastRepairTimeFromDB() // repair scheduler should load the repair time from the DB assertEquals(lastRepairTime, AutoRepair.instance.repairStates.get(repairType).getLastRepairTime()); } + + @Test + public void testRepairMaxRetries() + { + when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); + when(autoRepairState.isSuccess()).thenReturn(false); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AtomicInteger sleepCalls = new AtomicInteger(); + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + sleepCalls.getAndIncrement(); + assertEquals(TimeUnit.SECONDS, unit); + assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); + }; + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + + AutoRepair.instance.repair(repairType, 0); + + //system_auth.role_permissions,system_auth.network_permissions,system_auth.role_members,system_auth.roles, + // system_auth.resource_role_permissons_index,system_traces.sessions,system_traces.events,ks.tbl, + // system_distributed.auto_repair_priority,system_distributed.repair_history,system_distributed.auto_repair_history, + // system_distributed.view_build_status,system_distributed.parent_repair_history,system_distributed.partition_denylist + int exptedTablesGoingThroughRepair = 14; + assertEquals(config.getRepairMaxRetries()*exptedTablesGoingThroughRepair, sleepCalls.get()); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(exptedTablesGoingThroughRepair); + } + + @Test + public void testRepairSuccessAfterRetry() + { + when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AtomicInteger sleepCalls = new AtomicInteger(); + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + sleepCalls.getAndIncrement(); + assertEquals(TimeUnit.SECONDS, unit); + assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); + }; + when(autoRepairState.isSuccess()).then((invocationOnMock) -> { + if (sleepCalls.get() == 0) { + return false; + } + return true; + }); + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + AutoRepair.instance.repair(repairType, 0); + + assertEquals(1, sleepCalls.get()); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(14); + verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + } } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java new file mode 100644 index 000000000000..11df262f9500 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; +import java.util.UUID; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Suite; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.utils.FBUtilities; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertEquals; + +@RunWith(Suite.class) +@Suite.SuiteClasses({ AutoRepairServiceTest.BasicTests.class, AutoRepairServiceTest.SetterTests.class }) +public class AutoRepairServiceTest +{ + public static class BasicTests + { + private static AutoRepairService autoRepairService; + private static AutoRepairConfig config; + + @Before + public void setUp() + { + config = new AutoRepairConfig(); + autoRepairService = new AutoRepairService(); + autoRepairService.config = config; + } + + + @Test + public void testSetup() + { + AutoRepairService.instance.config = null; + + AutoRepairService.setup(); + + assertEquals(DatabaseDescriptor.getAutoRepairConfig(), AutoRepairService.instance.config); + } + + @Test + public void testGetAutoRepairConfigReturnsConfig() + { + assertEquals(config, autoRepairService.getAutoRepairConfig()); + } + + @Test + public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() + { + autoRepairService.setAutoRepairHistoryClearDeleteHostsBufferDuration("100s"); + + assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); + } + + + @Test + public void testsetAutoRepairMaxRetriesCount() + { + autoRepairService.setAutoRepairMaxRetriesCount(101); + + assertEquals(101, config.getRepairMaxRetries()); + } + + + @Test + public void testsetAutoRepairRetryBackoffInSec() + { + autoRepairService.setAutoRepairRetryBackoff("102s"); + + assertEquals(102, config.getRepairRetryBackoff().toSeconds()); + } + } + + @RunWith(Parameterized.class) + public static class SetterTests extends CQLTester + { + private static final AutoRepairConfig config = new AutoRepairConfig(true); + + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public T arg; + + @Parameterized.Parameter(2) + public BiConsumer setter; + + @Parameterized.Parameter(3) + public Function getter; + + @Parameterized.Parameters(name = "{index}: repairType={0}, arg={1}") + public static Collection testCases() + { + return Stream.of( + forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), + forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), + forEachRepairType(200, AutoRepairService.instance::setRepairSubRangeNum, config::getRepairSubRangeNum), + forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), + forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), + forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), + forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentageInGroup, config::getParallelRepairPercentageInGroup), + forEachRepairType(700, AutoRepairService.instance::setParallelRepairCountInGroup, config::getParallelRepairCountInGroup), + forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), + forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), + forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, SetterTests::isLocalHostForceRepair) + ).flatMap(Function.identity()).collect(Collectors.toList()); + } + + private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) + { + UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); + UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( + "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); + + if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) + { + return ImmutableSet.of(InetAddressAndPort.getLocalHost()); + } + return ImmutableSet.of(); + } + + private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) + { + Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + testCases[repairType.ordinal()] = new Object[]{ repairType, arg, setter, getter }; + } + + return Arrays.stream(testCases); + } + + @BeforeClass + public static void setup() throws Exception + { + setAutoRepairEnabled(true); + requireNetwork(); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); + AutoRepairUtils.setup(); + AutoRepairService.instance.config = config; + } + + @Before + public void prepare() + { + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY)); + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY)); + } + + @Test + public void testSetters() + { + setter.accept(repairType, arg); + assertEquals(arg, getter.apply(repairType)); + } + } +} diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java new file mode 100644 index 000000000000..2a69cefcace9 --- /dev/null +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -0,0 +1,312 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.PrintStream; +import java.util.Arrays; +import java.util.Collection; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Suite; + +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.tools.NodeProbe; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.junit.Assert.fail; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +@RunWith(Suite.class) +@Suite.SuiteClasses({ SetAutoRepairConfigTest.NoParamTests.class, SetAutoRepairConfigTest.RepairTypeParamTests.class, + SetAutoRepairConfigTest.RepairTypeAndArgsParamsTests.class }) +public class SetAutoRepairConfigTest +{ + protected static AutoRepairConfig config; + + protected static SetAutoRepairConfig cmd; + + public static void before(NodeProbe probeMock, PrintStream outMock) + { + config = new AutoRepairConfig(true); + when(probeMock.getAutoRepairConfig()).thenReturn(config); + cmd = new SetAutoRepairConfig(); + cmd.out = outMock; + } + + public static class NoParamTests + { + @Mock + private static NodeProbe probe; + + @Mock + private static PrintStream out; + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + before(probe, out); + } + + @Test + public void testHistoryDeleteHostsClearBufferInSec() + { + cmd.args = ImmutableList.of("history_clear_delete_hosts_buffer_interval", "1s"); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); + + // test scenario when auto repair is disabled + when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); + + cmd.execute(probe); + + // test new calls are not made when auto repair is disabled + verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); + } + + @Test + public void testRepairMaxRetries() + { + cmd.args = ImmutableList.of("repair_max_retries", "2"); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairMaxRetriesCount(2); + } + + + @Test + public void testRetryBackoffInSec() + { + cmd.args = ImmutableList.of("repair_retry_backoff", "3s"); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairRetryBackoff("3s"); + } + } + + @RunWith(Parameterized.class) + public static class RepairTypeParamTests + { + @Mock + private static NodeProbe probe; + + @Mock + private static PrintStream out; + + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters(name = "repairType={0}") + public static Object[] data() + { + return AutoRepairConfig.RepairType.values(); + } + + private static InetAddressAndPort localEndpoint; + private static InetAddressAndPort otherEndpoint; + + @Before + public void setUp() throws Exception + { + MockitoAnnotations.initMocks(this); + before(probe, out); + localEndpoint = InetAddressAndPort.getByName("127.0.0.1:7000"); + otherEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 1); + } + + @Test(expected = IllegalArgumentException.class) + public void testNoArgs() + { + cmd.repairType = repairType; + cmd.execute(probe); + } + + @Test + public void testRepairSchedulingDisabled() + { + when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); + cmd.repairType = repairType; + cmd.args = ImmutableList.of("threads", "1"); + + cmd.execute(probe); + + verify(out, times(1)).println("Auto-repair is not enabled"); + verify(probe, times(0)).setRepairThreads(repairType, 1); + } + + @Test + public void testRepairTypeDisabled() + { + config.setAutoRepairEnabled(repairType, false); + cmd.repairType = repairType; + cmd.args = ImmutableList.of("number_of_repair_threads", "1"); + + cmd.execute(probe); + + verify(probe, times(1)).setRepairThreads(repairType, 1); + } + + + @Test + public void testV2FlagMissing() + { + cmd.repairType = repairType; + cmd.args = ImmutableList.of("threads", "1"); + + try + { + cmd.execute(probe); + + fail("expected IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } + + verify(probe, times(0)).setRepairThreads(repairType, 0); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidParamType() + { + cmd.repairType = repairType; + cmd.args = ImmutableList.of("unknown_type", "1"); + + cmd.execute(probe); + } + + + @Test + public void testPriorityHosts() + { + cmd.repairType = repairType; + cmd.args = ImmutableList.of("priority_hosts", String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1))); + + cmd.execute(probe); + + verify(probe, times(1)).setRepairPriorityForHosts(repairType, ImmutableSet.of(localEndpoint, otherEndpoint)); + } + + @Test + public void testForceRepairHosts() + { + cmd.repairType = repairType; + cmd.args = ImmutableList.of("forcerepair_hosts", String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1))); + + cmd.execute(probe); + + verify(probe, times(1)).setForceRepairForHosts(repairType, ImmutableSet.of(localEndpoint, otherEndpoint)); + } + } + + @RunWith(Parameterized.class) + public static class RepairTypeAndArgsParamsTests + { + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public String paramType; + + @Parameterized.Parameter(2) + public String paramVal; + + @Parameterized.Parameter(3) + public Consumer verifyFunc; + + @Parameterized.Parameters(name = "repairType={0},paramType={1}") + public static Collection testCases() + { + return Stream.of( + forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type, true)), + forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setRepairThreads(type, 1)), + forEachRepairType("number_of_subranges", "2", (type) -> verify(probe, times(1)).setRepairSubRangeNum(type, 2)), + forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setRepairMinInterval(type, "3h")), + forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type, 4)), + forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type, "5s")), + forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type, true)), + forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCountInGroup(type, 6)), + forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentageInGroup(type, 7)), + forEachRepairType("mv_repair_enabled", "true", (type) -> verify(probe, times(1)).setMVRepairEnabled(type, true)), + forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))) + ).flatMap(Function.identity()).collect(Collectors.toList()); + } + + private static Stream forEachRepairType(String paramType, String paramVal, Consumer verifyFunc) + { + Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + testCases[repairType.ordinal()] = new Object[]{ repairType, paramType, paramVal, verifyFunc }; + } + + return Arrays.stream(testCases); + } + + @Mock + private static NodeProbe probe; + + @Mock + private static PrintStream out; + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + before(probe, out); + } + + @Test + public void test() + { + cmd.repairType = repairType; + cmd.args = ImmutableList.of(paramType, paramVal); + + cmd.execute(probe); + + verifyFunc.accept(repairType); + + // test scenario when auto repair is disabled + when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); + + cmd.execute(probe); + + // test new calls are not made when auto repair is disabled + verifyFunc.accept(repairType); + } + } +} From c7eceb9969bfefb772ed72d083c921d548a6512a Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 30 Aug 2024 09:35:48 -0700 Subject: [PATCH 013/257] Extend nodetool sstablerepairedset to affect all keyspaces when no keyspace is provided in args --- .../repair/autorepair/AutoRepair.java | 1 - .../tools/nodetool/SSTableRepairedSet.java | 72 +++++++++---------- .../nodetool/SSTableRepairedSetTest.java | 55 +++++++------- 3 files changed, 61 insertions(+), 67 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 2fa1db89c386..6f9673dfdb96 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -422,7 +422,6 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType AutoRepairUtils.removePriorityStatus(repairType, myId); } - logger.info("TEST123"); repairState.setFailedTokenRangesCount(failedTokenRanges); repairState.setSucceededTokenRangesCount(succeededTokenRanges); repairState.setSkippedTokenRangesCount(skippedTokenRanges); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java index 32ee09f93590..f22f3ca0df40 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java @@ -33,7 +33,7 @@ @Command(name = "sstablerepairedset", description = "Set the repaired state of SSTables for given keyspace/tables") public class SSTableRepairedSet extends NodeTool.NodeToolCmd { - @Arguments(usage = " []", description = "The keyspace optionally followed by one or more tables", required = true) + @Arguments(usage = "[ ]", description = "Optional keyspace followed by zero or more tables") protected List args = new ArrayList<>(); @Option(title = "really-set", @@ -56,57 +56,49 @@ public void execute(NodeProbe probe) { PrintStream out = probe.output().out; + if (isRepaired == isUnrepaired) + { + out.println("Exactly one of --is-repaired or --is-unrepaired must be provided."); + return; + } + String message; if (reallySet) - { message = "Mutating repaired state of SSTables for"; - } else - { message = "Previewing repaired state mutation of SSTables for"; - } - if (args.isEmpty()) - { - out.println("At least a keyspace name must be provided."); - return; - } - String keyspace = args.get(0); + List keyspaces = parseOptionalKeyspace(args, probe, KeyspaceSet.NON_LOCAL_STRATEGY); + List tables = List.of(parseOptionalTables(args)); - List tables; - if (args.size() > 1) - { - tables = args.subList(1, args.size()); - message += " tables " + String.join(", ", tables) + " in"; - } + if (args.isEmpty()) + message += " all keyspaces"; else - { - tables = probe.getTablesForKeyspace(keyspace); - message += " all tables in"; - } - message += " keyspace " + keyspace; - - if (isRepaired == isUnrepaired) - { - out.println("Exactly one of --is-repaired or --is-unrepaired must be provided."); - return; - } + message += tables.isEmpty() ? " all tables" : " tables " + String.join(", ", tables) + + " in keyspace " + keyspaces.get(0); message += " to " + (isRepaired ? "repaired" : "unrepaired"); out.println(message); - try + List sstableList = new ArrayList<>(); + for (String keyspace : keyspaces) { - List mutatedSSTables = probe.mutateSSTableRepairedState(isRepaired, !reallySet, keyspace, new ArrayList<>(tables)); - if (!reallySet) - out.println("The following SSTables would be mutated:"); - else - out.println("The following SSTables were mutated:"); - for (String sstable : mutatedSSTables) - out.println(sstable); - } - catch (InvalidRequestException e) - { - out.println(e.getMessage()); + try + { + sstableList.addAll(probe.mutateSSTableRepairedState(isRepaired, !reallySet, keyspace, + tables.isEmpty() + ? probe.getTablesForKeyspace(keyspace) // mutate all tables + : tables)); // mutate specific tables + } + catch (InvalidRequestException e) + { + out.println(e.getMessage()); + } } + if (!reallySet) + out.println("The following SSTables would be mutated:"); + else + out.println("The following SSTables were mutated:"); + for (String sstable : sstableList) + out.println(sstable); } } diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java index 0a2ead81b778..139cde3d839d 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java @@ -22,6 +22,7 @@ import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import org.junit.Before; import org.junit.Test; @@ -35,6 +36,7 @@ import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -46,56 +48,57 @@ public class SSTableRepairedSetTest private SSTableRepairedSet cmd; @Before - public void setUp() - { + public void setUp() { MockitoAnnotations.initMocks(this); - PrintStream noopStream = new PrintStream(new OutputStream() - { - @Override - public void write(int b) - { - } - }); + PrintStream noopStream = new PrintStream(new OutputStream() {@Override public void write(int b) {}}); when(probe.output()).thenReturn(new Output(noopStream, noopStream)); cmd = new SSTableRepairedSet(); } @Test - public void testNoKeyspace() - { + public void testNoKeyspace() { + when(probe.getNonLocalStrategyKeyspaces()).thenReturn(new ArrayList<>(List.of("ks1", "ks2"))); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(List.of("ks1", "ks2"))); + when(probe.getTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(List.of("table1", "table2"))); + when(probe.getTablesForKeyspace("ks2")).thenReturn(new ArrayList<>(List.of("table3", "table4"))); + cmd.isRepaired = true; + cmd.reallySet = true; + cmd.execute(probe); - verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); + + verify(probe, times(1)).mutateSSTableRepairedState(true, false, "ks1", List.of("table1", "table2")); + verify(probe, times(1)).mutateSSTableRepairedState(true, false, "ks2", List.of("table3", "table4")); } @Test - public void testBothRepairedAndUnrepaired() - { - cmd.args = Arrays.asList("keyspace"); + public void testBothRepairedAndUnrepaired() { + cmd.args = List.of("keyspace"); + cmd.isRepaired = true; + cmd.isUnrepaired = true; cmd.execute(probe); verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); } @Test - public void testNeitherRepairedNorUnrepaired() - { - cmd.args = Arrays.asList("keyspace"); + public void testNeitherRepairedNorUnrepaired() { + cmd.args = List.of("keyspace"); cmd.execute(probe); verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); } @Test - public void testRepairedPreview() - { - cmd.args = Arrays.asList("keyspace"); + public void testRepairedPreview() { + cmd.args = List.of("keyspace"); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(List.of("keyspace"))); cmd.isRepaired = true; cmd.execute(probe); verify(probe).mutateSSTableRepairedState(true, true, "keyspace", new ArrayList<>()); } @Test - public void testUnrepairedReallySet() - { - cmd.args = Arrays.asList("keyspace"); + public void testUnrepairedReallySet() { + cmd.args = List.of("keyspace"); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(List.of("keyspace"))); cmd.isUnrepaired = true; cmd.reallySet = true; cmd.execute(probe); @@ -103,9 +106,9 @@ public void testUnrepairedReallySet() } @Test - public void testExecuteWithTableNames() - { + public void testExecuteWithTableNames() { cmd.args = Arrays.asList("keyspace", "table1", "table2"); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(List.of("keyspace"))); cmd.isRepaired = true; cmd.reallySet = true; cmd.execute(probe); From 95e6605667be3ce956451c491fec4331fc94dbbd Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 30 Aug 2024 15:16:12 -0700 Subject: [PATCH 014/257] Make repair session timeout configurable --- .../repair/autorepair/AutoRepair.java | 2 +- .../repair/autorepair/AutoRepairConfig.java | 21 +++++++++++++++---- .../repair/autorepair/AutoRepairState.java | 5 +++-- .../cassandra/service/AutoRepairService.java | 5 +++++ .../service/AutoRepairServiceMBean.java | 2 ++ .../org/apache/cassandra/tools/NodeProbe.java | 5 +++++ .../tools/nodetool/GetAutoRepairConfig.java | 1 + .../tools/nodetool/SetAutoRepairConfig.java | 9 ++++++-- .../autorepair/AutoRepairConfigTest.java | 8 +++++++ .../AutoRepairParameterizedTest.java | 8 ++++--- .../autorepair/AutoRepairStateTest.java | 5 +++-- 11 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 6f9673dfdb96..fbe94d8f7365 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -293,7 +293,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) f = repairRunnableExecutors.get(repairType).submit(task); try { - repairState.waitForRepairToComplete(); + repairState.waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); } catch (InterruptedException e) { diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index e3e7d34658e9..6e187255df77 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -296,6 +296,17 @@ public DurationSpec.IntSecondsBound getInitialSchedulerDelay(RepairType repairTy return applyOverrides(repairType, opt -> opt.initial_scheduler_delay); } + public DurationSpec.IntSecondsBound getRepairSessionTimeout(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_session_timeout); + } + + public void setRepairSessionTimeout(RepairType repairType, String repairSessionTimeout) + { + ensureOverrides(repairType); + repair_type_overrides.get(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); + } + // Options configures auto-repair behavior for a given repair type. // All fields can be modified dynamically. public static class Options implements Serializable @@ -331,6 +342,7 @@ protected static Options getDefaultOptions() opts.mv_repair_enabled = true; opts.token_range_splitter = DefaultAutoRepairTokenSplitter.class.getName(); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("15m"); // 15 minutes + opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours return opts; } @@ -398,6 +410,9 @@ protected static Options getDefaultOptions() public volatile String token_range_splitter; // the minimum delay after a node starts before the scheduler starts running repair public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; + // repair session timeout - this is applicable for each repair session + // the major issue with Repair is a session sometimes hangs; so this timeout is useful to unblock such problems + public volatile DurationSpec.IntSecondsBound repair_session_timeout; public String toString() { @@ -417,6 +432,7 @@ public String toString() ", mv_repair_enabled=" + mv_repair_enabled + ", token_range_splitter=" + token_range_splitter + ", intial_scheduler_delay=" + initial_scheduler_delay + + ", repair_session_timeout=" + repair_session_timeout + '}'; } } @@ -446,9 +462,6 @@ protected void ensureOverrides(RepairType repairType) repair_type_overrides = new EnumMap<>(RepairType.class); } - if (repair_type_overrides.get(repairType) == null) - { - repair_type_overrides.put(repairType, new Options()); - } + repair_type_overrides.computeIfAbsent(repairType, k -> new Options()); } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index c734c4bdb122..d6f7e066d41e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.view.TableViews; @@ -135,10 +136,10 @@ public void progress(String tag, ProgressEvent event) } } - public void waitForRepairToComplete() throws InterruptedException + public void waitForRepairToComplete(DurationSpec.IntSecondsBound repairSessionTimeout) throws InterruptedException { //if for some reason we don't hear back on repair progress for sometime - if (!condition.await(12, TimeUnit.HOURS)) + if (!condition.await(repairSessionTimeout.toSeconds(), TimeUnit.SECONDS)) { success = false; } diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 60073b20d4b9..cbef9889fb27 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -156,4 +156,9 @@ public void setMVRepairEnabled(RepairType repairType, boolean enabled) { config.setMVRepairEnabled(repairType, enabled); } + + public void setRepairSessionTimeout(RepairType repairType, String timeout) + { + config.setRepairSessionTimeout(repairType, timeout); + } } diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index d01c6d61db1d..4ebf118e023e 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -59,4 +59,6 @@ public interface AutoRepairServiceMBean public void setMVRepairEnabled(RepairType repairType, boolean enabled); public AutoRepairConfig getAutoRepairConfig(); + + public void setRepairSessionTimeout(RepairType repairType, String timeout); } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index f45c3a40578b..c10bd5299a24 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2658,6 +2658,11 @@ public List mutateSSTableRepairedState(boolean repair, boolean preview, public List getTablesForKeyspace(String keyspace) { return ssProxy.getTablesForKeyspace(keyspace); } + + public void setRepairSessionTimeout(AutoRepairConfig.RepairType repairType, String timeout) + { + autoRepairProxy.setRepairSessionTimeout(repairType, timeout); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index cfb005542efc..30dd1ff889c4 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -74,6 +74,7 @@ private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, Au sb.append("\n\tpercentage of parallel repairs within group: " + config.getParallelRepairPercentageInGroup(repairType)); sb.append("\n\tmv repair enabled: " + config.getMVRepairEnabled(repairType)); sb.append("\n\tinitial scheduler delay: " + config.getInitialSchedulerDelay(repairType)); + sb.append("\n\trepair setssion timeout: " + config.getRepairSessionTimeout(repairType)); return sb.toString(); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index bf2d290ca54d..b467d9b82998 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -19,6 +19,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Splitter; + import io.airlift.airline.Arguments; import io.airlift.airline.Command; import io.airlift.airline.Option; @@ -45,7 +46,7 @@ public class SetAutoRepairConfig extends NodeToolCmd "[number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + - "|parallel_repair_count|parallel_repair_percentage|mv_repair_enabled|repair_max_retries|repair_retry_backoff]", + "|parallel_repair_count|parallel_repair_percentage|mv_repair_enabled|repair_max_retries|repair_retry_backoff|repair_session_timeout]", required = true) protected List args = new ArrayList<>(); @@ -143,12 +144,16 @@ public void execute(NodeProbe probe) case "mv_repair_enabled": probe.setMVRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); break; + case "repair_session_timeout": + probe.setRepairSessionTimeout(repairType, paramVal); + break; default: throw new IllegalArgumentException("Unknown parameter: " + paramType); } } - private Set validateLocalGroupHosts(String paramVal) { + private Set validateLocalGroupHosts(String paramVal) + { Set hosts = new HashSet<>(); for (String host : Splitter.on(',').split(paramVal)) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 923e35b73796..6d25dbeb20b4 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -468,4 +468,12 @@ public void testGetInitialSchedulerDelay() assertEquals(5, result); } + @Test + public void testSetRepairSessionTimeout() + { + config.setRepairSessionTimeout(repairType, "1h"); + + assert config.repair_type_overrides.get(repairType).repair_session_timeout.toSeconds() == 3600; + } + } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 4fb18bf9e0d6..29d52adc203b 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -31,6 +31,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.statements.schema.TableAttributes; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -491,6 +492,7 @@ public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMVRepairEnabled(repairType, false); + config.setRepairRetryBackoff("0s"); when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); AutoRepair.instance.repairStates.put(repairType, autoRepairState); @@ -508,7 +510,7 @@ public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws assertEquals("resetWaitCondition was not called before waitForRepairToComplete", resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get()); return null; - }).when(autoRepairState).waitForRepairToComplete(); + }).when(autoRepairState).waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); AutoRepair.instance.repair(repairType, 0); AutoRepair.instance.repair(repairType, 0); @@ -626,7 +628,7 @@ public void testRepairTakesLastRepairTimeFromDB() @Test public void testRepairMaxRetries() { - when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); when(autoRepairState.isSuccess()).thenReturn(false); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); AtomicInteger sleepCalls = new AtomicInteger(); @@ -654,7 +656,7 @@ public void testRepairMaxRetries() @Test public void testRepairSuccessAfterRetry() { - when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); AtomicInteger sleepCalls = new AtomicInteger(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index 340a21f97bfa..657b11310f64 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -31,6 +31,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; @@ -125,7 +126,7 @@ public void testWaitForRepairToComplete() throws Exception state.condition.signalAll(); Condition finishedCondition = Condition.newOneTimeCondition(); Callable waitForRepairToComplete = () -> { - state.waitForRepairToComplete(); + state.waitForRepairToComplete(new DurationSpec.IntSecondsBound("12h")); finishedCondition.signalAll(); return null; }; @@ -363,7 +364,7 @@ public void testWaitForRepairToCompleteDoesNotSetSuccessWhenProgressReceivesErro state.progress("test", progressEvent); assertFalse(state.success); - state.waitForRepairToComplete(); + state.waitForRepairToComplete(new DurationSpec.IntSecondsBound("12h")); assertFalse(state.success); } From 203b35c112b9b87dd214e7c9a8de044f256bdcbc Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 5 Sep 2024 18:31:09 -0700 Subject: [PATCH 015/257] Comments from Joshua M --- .../repair/autorepair/AutoRepairConfig.java | 29 +++++++++---------- .../autorepair/AutoRepairConfigTest.java | 10 +++---- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 6e187255df77..97868a594f25 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -238,24 +238,24 @@ public void setRepairPrimaryTokenRangeOnly(RepairType repairType, boolean primar public int getParallelRepairPercentageInGroup(RepairType repairType) { - return applyOverrides(repairType, opt -> opt.parallel_repair_percentage_in_group); + return applyOverrides(repairType, opt -> opt.parallel_repair_percentage); } public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup) { ensureOverrides(repairType); - repair_type_overrides.get(repairType).parallel_repair_percentage_in_group = percentageInGroup; + repair_type_overrides.get(repairType).parallel_repair_percentage = percentageInGroup; } public int getParallelRepairCountInGroup(RepairType repairType) { - return applyOverrides(repairType, opt -> opt.parallel_repair_count_in_group); + return applyOverrides(repairType, opt -> opt.parallel_repair_count); } public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup) { ensureOverrides(repairType); - repair_type_overrides.get(repairType).parallel_repair_count_in_group = countInGroup; + repair_type_overrides.get(repairType).parallel_repair_count = countInGroup; } public boolean getMVRepairEnabled(RepairType repairType) @@ -331,15 +331,15 @@ protected static Options getDefaultOptions() opts.repair_by_keyspace = false; opts.number_of_subranges = 16; opts.number_of_repair_threads = 1; - opts.parallel_repair_count_in_group = 1; - opts.parallel_repair_percentage_in_group = 0; + opts.parallel_repair_count = 3; + opts.parallel_repair_percentage = 3; opts.sstable_upper_threshold = 10000; opts.min_repair_interval = new DurationSpec.IntSecondsBound("24h"); opts.ignore_dcs = new HashSet<>(); opts.repair_primary_token_range_only = true; opts.force_repair_new_node = false; opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); - opts.mv_repair_enabled = true; + opts.mv_repair_enabled = false; opts.token_range_splitter = DefaultAutoRepairTokenSplitter.class.getName(); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("15m"); // 15 minutes opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours @@ -362,13 +362,12 @@ protected static Options getDefaultOptions() // Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. // This is similar to -j for repair options for the nodetool repair command. public volatile Integer number_of_repair_threads; - // the number of repair sessions that can run in parallel in a single group - // The number of nodes running repair parallelly. If parallelrepaircount is set, it will choose the larger value of the two. The default is 3. + // The number of nodes running repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. The default is 3. // This configuration controls how many nodes would run repair in parallel. // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. // If one or more node(s) finish repair, then the framework automatically picks up the next candidate and ensures the maximum number of nodes running repair do not exceed “3”. - public volatile Integer parallel_repair_count_in_group; - // the number of repair sessions that can run in parallel in a single groupas a percentage + public volatile Integer parallel_repair_count; + // the number of repair nodes that can run in parallel // of the total number of nodes in the group [0,100] // The percentage of nodes in the cluster that run repair parallelly. If parallelrepaircount is set, it will choose the larger value of the two. // The problem with a fixed number of nodes (the above property) is that in a large-scale environment, @@ -376,7 +375,7 @@ protected static Options getDefaultOptions() // The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. // So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. // Extremely fewer manual interventions as it will rarely violate the repair SLA for customers - public volatile Integer parallel_repair_percentage_in_group; + public volatile Integer parallel_repair_percentage; // the upper threshold of SSTables allowed to participate in a single repair session // Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. // This is to avoid penalizing good tables (neighbors) with an outlier. @@ -421,8 +420,8 @@ public String toString() ", repair_by_keyspace=" + repair_by_keyspace + ", number_of_subranges=" + number_of_subranges + ", number_of_repair_threads=" + number_of_repair_threads + - ", parallel_repair_count_in_group=" + parallel_repair_count_in_group + - ", parallel_repair_percentage_in_group=" + parallel_repair_percentage_in_group + + ", parallel_repair_count_in_group=" + parallel_repair_count + + ", parallel_repair_percentage_in_group=" + parallel_repair_percentage + ", sstable_upper_threshold=" + sstable_upper_threshold + ", min_repair_interval=" + min_repair_interval + ", ignore_dcs=" + ignore_dcs + @@ -432,7 +431,7 @@ public String toString() ", mv_repair_enabled=" + mv_repair_enabled + ", token_range_splitter=" + token_range_splitter + ", intial_scheduler_delay=" + initial_scheduler_delay + - ", repair_session_timeout=" + repair_session_timeout + + ", repair_session_timeout=" + repair_session_timeout + '}'; } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 6d25dbeb20b4..07eb9070fe85 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -340,7 +340,7 @@ public void testSetRepairPrimaryTokenRangeOnly() @Test public void testGetParallelRepairPercentageInGroup() { - config.global_settings.parallel_repair_percentage_in_group = 5; + config.global_settings.parallel_repair_percentage = 5; int result = config.getParallelRepairPercentageInGroup(repairType); @@ -352,13 +352,13 @@ public void testSetParallelRepairPercentageInGroup() { config.setParallelRepairPercentageInGroup(repairType, 5); - assert config.repair_type_overrides.get(repairType).parallel_repair_percentage_in_group == 5; + assert config.repair_type_overrides.get(repairType).parallel_repair_percentage == 5; } @Test public void testGetParallelRepairCountInGroup() { - config.global_settings.parallel_repair_count_in_group = 5; + config.global_settings.parallel_repair_count = 5; int result = config.getParallelRepairCountInGroup(repairType); @@ -370,7 +370,7 @@ public void testSetParallelRepairCountInGroup() { config.setParallelRepairCountInGroup(repairType, 5); - assert config.repair_type_overrides.get(repairType).parallel_repair_count_in_group == 5; + assert config.repair_type_overrides.get(repairType).parallel_repair_count == 5; } @Test @@ -432,7 +432,7 @@ public void testGetDefaultOptionsMVRepairIsEnabledByDefault() { Options defaultOptions = Options.getDefaultOptions(); - assertTrue(defaultOptions.mv_repair_enabled); + assertFalse(defaultOptions.mv_repair_enabled); } @Test From 7b4a96d110686ca12323004066d1906d976ecae6 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sat, 7 Sep 2024 16:32:20 -0700 Subject: [PATCH 016/257] AutoRepair scheduler InJvm dtest --- .../repair/autorepair/AutoRepair.java | 4 +- .../tools/nodetool/SSTableRepairedSet.java | 3 +- .../test/repair/AutoRepairSchedulerTest.java | 128 ++++++++++++++++++ .../config/DatabaseDescriptorRefTest.java | 9 ++ .../config/YamlConfigurationLoaderTest.java | 10 +- .../AutoRepairParameterizedTest.java | 1 - .../nodetool/SSTableRepairedSetTest.java | 27 ++-- 7 files changed, 159 insertions(+), 23 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index fbe94d8f7365..1a3c5071db36 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -115,7 +115,7 @@ public void setup() for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { repairExecutors.get(repairType).scheduleWithFixedDelay( - () -> repair(repairType, 60000), + () -> repair(repairType, 5000), config.getInitialSchedulerDelay(repairType).toSeconds(), config.getRepairCheckInterval().toSeconds(), TimeUnit.SECONDS); @@ -443,7 +443,7 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType if (timeInHours == 0 && millisToWait > 0) { //If repair finished quickly, happens for an empty instance, in such case - //wait for a minute so that the JMX metrics can detect the repairInProgress + //wait for some duration so that the JMX metrics can detect the repairInProgress logger.info("Wait for {} milliseconds for repair type {}.", millisToWait, repairType); Thread.sleep(millisToWait); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java index f22f3ca0df40..e1773fe47b7f 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java @@ -20,6 +20,7 @@ import java.io.PrintStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; @@ -69,7 +70,7 @@ public void execute(NodeProbe probe) message = "Previewing repaired state mutation of SSTables for"; List keyspaces = parseOptionalKeyspace(args, probe, KeyspaceSet.NON_LOCAL_STRATEGY); - List tables = List.of(parseOptionalTables(args)); + List tables = new ArrayList<>(Arrays.asList(parseOptionalTables(args))); if (args.isEmpty()) message += " all keyspaces"; diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java new file mode 100644 index 000000000000..49b4300dc50b --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.repair; + +import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableMap; +import com.google.common.util.concurrent.Uninterruptibles; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; + +import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; +import static org.junit.Assert.assertEquals; + +public class AutoRepairSchedulerTest extends TestBaseImpl +{ + + private static Cluster cluster; + static SimpleDateFormat sdf; + + @BeforeClass + public static void init() throws IOException + { + // Define the expected date format pattern + String pattern = "EEE MMM dd HH:mm:ss z yyyy"; + // Create SimpleDateFormat object with the given pattern + sdf = new SimpleDateFormat(pattern); + sdf.setLenient(false); + cluster = Cluster.build(3).withConfig(config -> config + .set("auto_repair", + ImmutableMap.of( + "repair_type_overrides", + ImmutableMap.of(AutoRepairConfig.RepairType.full.toString(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "1", + "parallel_repair_percentage", "0", + "min_repair_interval", "1s"), + AutoRepairConfig.RepairType.incremental.toString(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "1", + "parallel_repair_percentage", "0", + "min_repair_interval", "1s")))) + .set("auto_repair.enabled", "true") + .set("auto_repair.repair_check_interval", "10s")).start(); + + cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); + } + + @Test + public void testScheduler() throws ParseException + { + // ensure there was no history of previous repair runs through the scheduler + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s", DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY), ConsistencyLevel.QUORUM); + assertEquals(0, rows.length); + + cluster.forEach(i -> i.runOnInstance(() -> { + try + { + AutoRepair.instance.setup(); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + })); + // wait for a couple of minutes for repair to go through on all three nodes + Uninterruptibles.sleepUninterruptibly(2, TimeUnit.MINUTES); + + validate(AutoRepairConfig.RepairType.full.toString()); + validate(AutoRepairConfig.RepairType.incremental.toString()); + } + + private void validate(String repairType) throws ParseException + { + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s where repair_type='%s'", DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, repairType), ConsistencyLevel.QUORUM); + assertEquals(3, rows.length); + for (int node = 0; node < rows.length; node++) + { + Object[] row = rows[node]; + // repair_type + Assert.assertEquals(repairType, row[0].toString()); + // host_id + Assert.assertEquals(String.format("00000000-0000-4000-8000-%012d", node + 1), row[1].toString()); + // ensure there is a legit repair_start_ts and repair_finish_ts + sdf.parse(row[2].toString()); + sdf.parse(row[3].toString()); + // the reason why the repair was scheduled + Assert.assertEquals("MY_TURN", row[4].toString()); + for (Object col : row) + { + System.out.println("Data:" + col); + } + System.out.println("====================================="); + } + } +} diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index 7d27271a0dbd..d62e88091635 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -114,6 +114,14 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.Config$CorruptedTombstoneStrategy", "org.apache.cassandra.config.Config$BatchlogEndpointStrategy", "org.apache.cassandra.config.Config$TombstonesMetricGranularity", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig$Options", + "org.apache.cassandra.repair.autorepair.DefaultAutoRepairTokenSplitter", + "org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter", + "org.apache.cassandra.repair.autorepair.FullRepairState", + "org.apache.cassandra.repair.autorepair.IncrementalRepairState", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig$RepairType", + "org.apache.cassandra.repair.autorepair.AutoRepairState", "org.apache.cassandra.config.DatabaseDescriptor$ByteUnit", "org.apache.cassandra.config.DataRateSpec", "org.apache.cassandra.config.DataRateSpec$DataRateUnit", @@ -338,6 +346,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.utils.concurrent.RefCounted", "org.apache.cassandra.utils.concurrent.SelfRefCounted", "org.apache.cassandra.utils.concurrent.Transactional", + "org.apache.cassandra.utils.progress.ProgressListener", "org.apache.cassandra.utils.concurrent.UncheckedInterruptedException", }; diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 85433c1efe0d..189c5f3bd901 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -279,11 +279,11 @@ public void fromMapTest() "optional", false, "enabled", true); Map autoRepairConfig = ImmutableMap.of("enabled", true, - "global_settings", ImmutableMap.of("repair_dc_groups", - ImmutableSet.of("all the groups")), + "global_settings", ImmutableMap.of("number_of_repair_threads", + 1), "repair_type_overrides", ImmutableMap.of( - "full", ImmutableMap.of("repair_dc_groups", - ImmutableSet.of("none of the groups")))); + "full", ImmutableMap.of("number_of_repair_threads", + 2))); Map map = new ImmutableMap.Builder() .put("storage_port", storagePort) .put("commitlog_sync", commitLogSync) @@ -304,7 +304,7 @@ public void fromMapTest() assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_send_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(true, config.auto_repair.enabled); - assertEquals(6 * 60 * 60L, config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.incremental)); + assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.incremental)); config.auto_repair.setMVRepairEnabled(AutoRepairConfig.RepairType.incremental, false); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 29d52adc203b..bd1f7eb8cf3e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -31,7 +31,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; -import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.statements.schema.TableAttributes; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java index 139cde3d839d..bc168c0752d7 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java @@ -22,7 +22,6 @@ import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; -import java.util.List; import org.junit.Before; import org.junit.Test; @@ -57,22 +56,22 @@ public void setUp() { @Test public void testNoKeyspace() { - when(probe.getNonLocalStrategyKeyspaces()).thenReturn(new ArrayList<>(List.of("ks1", "ks2"))); - when(probe.getKeyspaces()).thenReturn(new ArrayList<>(List.of("ks1", "ks2"))); - when(probe.getTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(List.of("table1", "table2"))); - when(probe.getTablesForKeyspace("ks2")).thenReturn(new ArrayList<>(List.of("table3", "table4"))); + when(probe.getNonLocalStrategyKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); + when(probe.getTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(Arrays.asList("table1", "table2"))); + when(probe.getTablesForKeyspace("ks2")).thenReturn(new ArrayList<>(Arrays.asList("table3", "table4"))); cmd.isRepaired = true; cmd.reallySet = true; cmd.execute(probe); - verify(probe, times(1)).mutateSSTableRepairedState(true, false, "ks1", List.of("table1", "table2")); - verify(probe, times(1)).mutateSSTableRepairedState(true, false, "ks2", List.of("table3", "table4")); + verify(probe, times(1)).mutateSSTableRepairedState(true, false, "ks1", Arrays.asList("table1", "table2")); + verify(probe, times(1)).mutateSSTableRepairedState(true, false, "ks2", Arrays.asList("table3", "table4")); } @Test public void testBothRepairedAndUnrepaired() { - cmd.args = List.of("keyspace"); + cmd.args = Arrays.asList("keyspace"); cmd.isRepaired = true; cmd.isUnrepaired = true; cmd.execute(probe); @@ -81,15 +80,15 @@ public void testBothRepairedAndUnrepaired() { @Test public void testNeitherRepairedNorUnrepaired() { - cmd.args = List.of("keyspace"); + cmd.args = Arrays.asList("keyspace"); cmd.execute(probe); verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); } @Test public void testRepairedPreview() { - cmd.args = List.of("keyspace"); - when(probe.getKeyspaces()).thenReturn(new ArrayList<>(List.of("keyspace"))); + cmd.args = Arrays.asList("keyspace"); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isRepaired = true; cmd.execute(probe); verify(probe).mutateSSTableRepairedState(true, true, "keyspace", new ArrayList<>()); @@ -97,8 +96,8 @@ public void testRepairedPreview() { @Test public void testUnrepairedReallySet() { - cmd.args = List.of("keyspace"); - when(probe.getKeyspaces()).thenReturn(new ArrayList<>(List.of("keyspace"))); + cmd.args = Arrays.asList("keyspace"); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isUnrepaired = true; cmd.reallySet = true; cmd.execute(probe); @@ -108,7 +107,7 @@ public void testUnrepairedReallySet() { @Test public void testExecuteWithTableNames() { cmd.args = Arrays.asList("keyspace", "table1", "table2"); - when(probe.getKeyspaces()).thenReturn(new ArrayList<>(List.of("keyspace"))); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isRepaired = true; cmd.reallySet = true; cmd.execute(probe); From b261be080304176727b2b57c7e44765ecb3b40f3 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 23 Sep 2024 18:19:36 -0700 Subject: [PATCH 017/257] Increase CircleCI parallelism 1-->4 --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 864919b8f418..553e0e32a8a7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5160,7 +5160,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra From 89a11992281f50a72073660b781431efcc593c9b Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 24 Sep 2024 14:07:04 -0700 Subject: [PATCH 018/257] Fix a test case failure org.apache.cassandra.schema.SchemaKeyspaceTest::testConversionsInverses --- src/java/org/apache/cassandra/schema/TableParams.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index c73a914de12c..3dabf47da264 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -313,7 +313,7 @@ public boolean equals(Object o) && transactionalMode == p.transactionalMode && transactionalMigrationFrom == p.transactionalMigrationFrom && pendingDrop == p.pendingDrop - && automatedRepair == p.automatedRepair; + && automatedRepair.equals(p.automatedRepair); } @Override From c886358a4e0e002c8e36800b0c7cf47d8f447447 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 24 Sep 2024 14:29:08 -0700 Subject: [PATCH 019/257] Trigger automatic CircleCI jobs --- .circleci/config.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 553e0e32a8a7..456bd04063e6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9477,18 +9477,15 @@ workflows: java11_separate_tests: jobs: - start_j11_build: - type: approval - j11_build: requires: - start_j11_build - start_j11_unit_tests: - type: approval - j11_unit_tests: requires: - start_j11_unit_tests - j11_build - start_j11_jvm_dtests: - type: approval - j11_jvm_dtests: requires: - start_j11_jvm_dtests @@ -9518,13 +9515,11 @@ workflows: - start_j11_simulator_dtests - j11_build - start_j11_cqlshlib_tests: - type: approval - j11_cqlshlib_tests: requires: - start_j11_cqlshlib_tests - j11_build - start_j11_cqlshlib_cython_tests: - type: approval - j11_cqlshlib_cython_tests: requires: - start_j11_cqlshlib_cython_tests @@ -9656,13 +9651,11 @@ workflows: - start_jvm_upgrade_dtests - j11_dtest_jars_build - start_j11_dtests: - type: approval - j11_dtests: requires: - start_j11_dtests - j11_build - start_j11_dtests_vnode: - type: approval - j11_dtests_vnode: requires: - start_j11_dtests_vnode @@ -9692,13 +9685,11 @@ workflows: - start_j17_dtests_latest - j11_build - start_j11_dtests_large: - type: approval - j11_dtests_large: requires: - start_j11_dtests_large - j11_build - start_j11_dtests_large_vnode: - type: approval - j11_dtests_large_vnode: requires: - start_j11_dtests_large_vnode @@ -9780,7 +9771,6 @@ workflows: java11_pre-commit_tests: jobs: - start_pre-commit_tests: - type: approval - j11_build: requires: - start_pre-commit_tests @@ -9916,7 +9906,6 @@ workflows: requires: - j11_build - start_j11_dtests_large: - type: approval - j11_dtests_large: requires: - start_j11_dtests_large From 0f4d41f8f6828a9967a4a73860dd5fc28fd2dba8 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 24 Sep 2024 14:31:09 -0700 Subject: [PATCH 020/257] CircleCI ask for a manual approval --- .circleci/config.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 456bd04063e6..553e0e32a8a7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9477,15 +9477,18 @@ workflows: java11_separate_tests: jobs: - start_j11_build: + type: approval - j11_build: requires: - start_j11_build - start_j11_unit_tests: + type: approval - j11_unit_tests: requires: - start_j11_unit_tests - j11_build - start_j11_jvm_dtests: + type: approval - j11_jvm_dtests: requires: - start_j11_jvm_dtests @@ -9515,11 +9518,13 @@ workflows: - start_j11_simulator_dtests - j11_build - start_j11_cqlshlib_tests: + type: approval - j11_cqlshlib_tests: requires: - start_j11_cqlshlib_tests - j11_build - start_j11_cqlshlib_cython_tests: + type: approval - j11_cqlshlib_cython_tests: requires: - start_j11_cqlshlib_cython_tests @@ -9651,11 +9656,13 @@ workflows: - start_jvm_upgrade_dtests - j11_dtest_jars_build - start_j11_dtests: + type: approval - j11_dtests: requires: - start_j11_dtests - j11_build - start_j11_dtests_vnode: + type: approval - j11_dtests_vnode: requires: - start_j11_dtests_vnode @@ -9685,11 +9692,13 @@ workflows: - start_j17_dtests_latest - j11_build - start_j11_dtests_large: + type: approval - j11_dtests_large: requires: - start_j11_dtests_large - j11_build - start_j11_dtests_large_vnode: + type: approval - j11_dtests_large_vnode: requires: - start_j11_dtests_large_vnode @@ -9771,6 +9780,7 @@ workflows: java11_pre-commit_tests: jobs: - start_pre-commit_tests: + type: approval - j11_build: requires: - start_pre-commit_tests @@ -9906,6 +9916,7 @@ workflows: requires: - j11_build - start_j11_dtests_large: + type: approval - j11_dtests_large: requires: - start_j11_dtests_large From 8272926c87296dfe1ead1b0479ce9f19177e7105 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Tue, 24 Sep 2024 20:27:33 -0700 Subject: [PATCH 021/257] Fix the JMXStandardsTest::interfaces --- .../apache/cassandra/repair/autorepair/AutoRepairConfig.java | 2 +- .../apache/cassandra/config/YamlConfigurationLoaderTest.java | 1 - test/unit/org/apache/cassandra/tools/JMXStandardsTest.java | 5 +++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 97868a594f25..5c839063c036 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -56,7 +56,7 @@ public class AutoRepairConfig implements Serializable // global_settings overides Options.defaultOptions for all repair types public volatile Options global_settings; - public enum RepairType + public enum RepairType implements Serializable { full, incremental; diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 189c5f3bd901..9a0203c301bd 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -32,7 +32,6 @@ import java.util.function.Predicate; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import org.junit.Test; import com.fasterxml.jackson.core.JsonProcessingException; diff --git a/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java b/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java index 62b6b9b5ec16..df0a2c1be2ea 100644 --- a/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java +++ b/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java @@ -51,6 +51,8 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.utils.BreaksJMX; import org.assertj.core.api.Assertions; import org.reflections.Reflections; @@ -100,6 +102,9 @@ public class JMXStandardsTest .add(ClassNotFoundException.class) .add(OpenDataException.class) .add(InvalidRequestException.class) + .add(AutoRepairConfig.RepairType.class) + .add(InetAddressAndPort.class) + .add(AutoRepairConfig.class) .build(); /** * This list is a set of types under java.* and javax.*, but are too vague that could cause issues; this does not From a1dbabb62bdd5e5d7ecf6a8c30e3cf87394f8972 Mon Sep 17 00:00:00 2001 From: Jaydeepkumar Chovatia Date: Tue, 24 Sep 2024 21:00:25 -0700 Subject: [PATCH 022/257] Circle AutoStart J8 tests From fb988dae442ed911eda908dcd6cb0b18713eba9a Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Wed, 25 Sep 2024 21:15:17 -0700 Subject: [PATCH 023/257] Allow IR to run with MVs and CDC if write path replay is disabled --- .../config/CassandraRelevantProperties.java | 11 ++ .../db/streaming/CassandraStreamReceiver.java | 7 +- .../repair/autorepair/AutoRepair.java | 16 +-- .../repair/autorepair/AutoRepairConfig.java | 6 - .../cassandra/service/AutoRepairService.java | 18 +++ .../cassandra/streaming/StreamOperation.java | 4 +- .../streaming/CassandraSreamReceiverTest.java | 136 ++++++++++++++++++ .../autorepair/AutoRepairConfigTest.java | 57 -------- .../AutoRepairParameterizedTest.java | 58 ++++++++ .../repair/autorepair/AutoRepairTest.java | 16 ++- .../service/AutoRepairServiceTest.java | 63 +++++++- 11 files changed, 310 insertions(+), 82 deletions(-) create mode 100644 test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 7459fecb8c34..6b800d10d8fe 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -674,6 +674,17 @@ public enum CassandraRelevantProperties /** Gossiper compute expiration timeout. Default value 3 days. */ VERY_LONG_TIME_MS("cassandra.very_long_time_ms", "259200000"), WAIT_FOR_TRACING_EVENTS_TIMEOUT_SECS("cassandra.wait_for_tracing_events_timeout_secs", "0"), + /** + * If set to true, mutations streamed during anti-entropy repair will be replayed via the regular write path for associated views. + */ + STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR("cassandra.streaming.requires_view_build_during_repair", "true"), + + /** + * If set to true, streamed mutations via the regular write path for CDC. + * Deprecate this property in trunk (or 5.0) as a new config has been added to control this https://issues.apache.org/jira/browse/CASSANDRA-17666 + */ + STREAMING_REQUIRES_CDC_REPLAY("cassandra.streaming.requires_cdc_replay", "true"), + ; static diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 36baaacef5ea..c53ad5387e9a 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -25,6 +25,11 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; + +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; +import org.apache.cassandra.io.sstable.SSTable; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -205,7 +210,7 @@ private boolean cdcRequiresWriteCommitLog(ColumnFamilyStore cfs) * For CDC-enabled tables and write path for CDC is enabled, we want to ensure that the mutations are * run through the CommitLog, so they can be archived by the CDC process on discard. */ - private boolean requiresWritePath(ColumnFamilyStore cfs) + public boolean requiresWritePath(ColumnFamilyStore cfs) { return cdcRequiresWriteCommitLog(cfs) || cfs.streamToMemtable() diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 1a3c5071db36..cef97595cffc 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -48,7 +48,6 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; @@ -106,14 +105,15 @@ protected AutoRepair() public void setup() { - verifyIsSafeToEnable(); - AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); AutoRepairService.setup(); AutoRepairUtils.setup(); for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { + if (config.isAutoRepairEnabled(repairType)) + AutoRepairService.instance.checkCanRun(repairType); + repairExecutors.get(repairType).scheduleWithFixedDelay( () -> repair(repairType, 5000), config.getInitialSchedulerDelay(repairType).toSeconds(), @@ -122,15 +122,6 @@ public void setup() } } - @VisibleForTesting - protected void verifyIsSafeToEnable() - { - AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); - if (config.isAutoRepairEnabled(AutoRepairConfig.RepairType.incremental) && - (DatabaseDescriptor.getMaterializedViewsEnabled() || DatabaseDescriptor.isCDCEnabled())) - throw new ConfigurationException("Cannot enable incremental repair with materialized views or CDC enabled"); - } - // repairAsync runs a repair session of the given type asynchronously. public void repairAsync(AutoRepairConfig.RepairType repairType, long millisToWait) { @@ -146,6 +137,7 @@ public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) logger.debug("Auto-repair is disabled for repair type {}", repairType); return; } + AutoRepairService.instance.checkCanRun(repairType); AutoRepairState repairState = repairStates.get(repairType); try { diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 5c839063c036..b43262e9a4e4 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -30,9 +30,7 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.DurationSpec; -import org.apache.cassandra.exceptions.ConfigurationException; public class AutoRepairConfig implements Serializable { @@ -140,10 +138,6 @@ public boolean isAutoRepairEnabled(RepairType repairType) public void setAutoRepairEnabled(RepairType repairType, boolean enabled) { - if (enabled && repairType == RepairType.incremental && - (DatabaseDescriptor.getMaterializedViewsEnabled() || DatabaseDescriptor.isCDCEnabled())) - throw new ConfigurationException("Cannot enable incremental repair with materialized views or CDC enabled"); - ensureOverrides(repairType); repair_type_overrides.get(repairType).enabled = enabled; } diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index cbef9889fb27..3d59207a8c28 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -17,7 +17,9 @@ */ package org.apache.cassandra.service; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; @@ -52,6 +54,21 @@ public static void setup() MBeanWrapper.instance.registerMBean(instance, MBEAN_NAME); } + public void checkCanRun(RepairType repairType) + { + if (!config.isAutoRepairSchedulingEnabled()) + throw new ConfigurationException("Auto-repair scheduller is disabled."); + + if (repairType != RepairType.incremental) + return; + + if (CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.getBoolean()) + throw new ConfigurationException("Cannot run incremental repair while materialized view replay is enabled."); + + if (CassandraRelevantProperties.STREAMING_REQUIRES_CDC_REPLAY.getBoolean()) + throw new ConfigurationException("Cannot run incremental repair while CDC replay is enabled."); + } + @Override public AutoRepairConfig getAutoRepairConfig() { @@ -61,6 +78,7 @@ public AutoRepairConfig getAutoRepairConfig() @Override public void setAutoRepairEnabled(RepairType repairType, boolean enabled) { + checkCanRun(repairType); config.setAutoRepairEnabled(repairType, enabled); } diff --git a/src/java/org/apache/cassandra/streaming/StreamOperation.java b/src/java/org/apache/cassandra/streaming/StreamOperation.java index b1c5908f7fe8..2ed83addaf84 100644 --- a/src/java/org/apache/cassandra/streaming/StreamOperation.java +++ b/src/java/org/apache/cassandra/streaming/StreamOperation.java @@ -17,6 +17,8 @@ */ package org.apache.cassandra.streaming; +import org.apache.cassandra.config.CassandraRelevantProperties; + public enum StreamOperation { OTHER("Other", true, false, false), // Fallback to avoid null types when deserializing from string @@ -26,7 +28,7 @@ public enum StreamOperation BOOTSTRAP("Bootstrap", false, true, false), REBUILD("Rebuild", false, true, false), BULK_LOAD("Bulk Load", true, false, false), - REPAIR("Repair", true, false, true); + REPAIR("Repair", CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.getBoolean(), false, true); private final String description; private final boolean requiresViewBuild; diff --git a/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java b/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java new file mode 100644 index 000000000000..0c3eeb37b012 --- /dev/null +++ b/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.streaming; + +import java.util.Collections; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.streaming.StreamOperation; +import org.apache.cassandra.streaming.StreamSession; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +public class CassandraSreamReceiverTest extends CQLTester +{ + @Mock + private StreamSession session; + + private static final String CDC_TABLE = "cdc_table"; + private static final String MV_TABLE = "mv_table"; + private static final String CDC_MV_TABLE = "cdc_mv_table"; + private static final String NO_CDC_MV_TABLE = "no_cdc_mv_table"; + + @BeforeClass + public static void beforeClass() + { + System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); + } + + @Before + public void setup() + { + MockitoAnnotations.initMocks(this); + QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=true", KEYSPACE, CDC_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=false", KEYSPACE, MV_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW IF NOT EXISTS %s.mv AS SELECT * FROM %s.%s WHERE pk IS NOT NULL PRIMARY KEY (pk)", KEYSPACE, KEYSPACE, MV_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=true", KEYSPACE, CDC_MV_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW IF NOT EXISTS %s.mv2 AS SELECT * FROM %s.%s WHERE pk IS NOT NULL PRIMARY KEY (pk)", KEYSPACE, KEYSPACE, CDC_MV_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=false", KEYSPACE, NO_CDC_MV_TABLE)); + } + + @Test + public void testRequiresWritePathRepair() + { + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_MV_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + assertTrue(receiver.requiresWritePath(cfs)); + } + + @Test + public void testRequiresWritePathBulkLoad() + { + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_MV_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + assertTrue(receiver.requiresWritePath(cfs)); + } + + @Test + public void testRequiresWritePathNoCDCOrMV() + { + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(NO_CDC_MV_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + assertFalse(receiver.requiresWritePath(cfs)); + } + + @Test + public void testRequiresWritePathRepairMVOnly() + { + // Access the private field using reflection + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(MV_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + assertTrue(receiver.requiresWritePath(cfs)); + } + + @Test + public void testRequiresWritePathRepairCDCWithSystemProp() + { + System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); + + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + assertTrue(receiver.requiresWritePath(cfs)); + + System.clearProperty("cassandra.streaming.requires_cdc_replay"); + } + + @Test + public void testDoesNotRequiresWritePathRepairCDCOnly() + { + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); + CassandraStreamReceiver receiver1 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + assertFalse(receiver1.requiresWritePath(cfs)); + + System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); + CassandraStreamReceiver receiver2 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + assertTrue(receiver2.requiresWritePath(cfs)); + + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 07eb9070fe85..da7154077bc2 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -38,7 +38,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -119,62 +118,6 @@ public void testSetAutoRepairEnabledNoMVOrCDC() assertTrue(config.repair_type_overrides.get(repairType).enabled); } - @Test - public void testSetAutoRepairEnabledWithMV() - { - DatabaseDescriptor.setCDCEnabled(false); - DatabaseDescriptor.setMaterializedViewsEnabled(true); - - try - { - config.setAutoRepairEnabled(repairType, true); - - if (repairType == AutoRepairConfig.RepairType.incremental) - { - assertFalse(config.repair_type_overrides.get(repairType).enabled); // IR should not be allowed with MV - assertNotEquals(AutoRepairConfig.RepairType.incremental, repairType); // should receive exception - } - else - { - assertTrue(config.repair_type_overrides.get(repairType).enabled); - } - } - catch (ConfigurationException e) - { - // should throw only if repairType is incremental - assertEquals(AutoRepairConfig.RepairType.incremental, repairType); - } - } - - @Test - public void testSetAutoRepairEnabledWithCDC() - { - DatabaseDescriptor.setCDCEnabled(true); - DatabaseDescriptor.setMaterializedViewsEnabled(false); - - try - { - config.setAutoRepairEnabled(repairType, true); - - - if (repairType == AutoRepairConfig.RepairType.incremental) - { - assertFalse(config.repair_type_overrides.get(repairType).enabled); // IR should not be allowed with CDC - assertNotEquals(AutoRepairConfig.RepairType.incremental, repairType); // should receive exception - } - else - { - assertTrue(config.repair_type_overrides.get(repairType).enabled); - } - } - catch (ConfigurationException e) - { - // should throw only if repairType is incremental - assertEquals(AutoRepairConfig.RepairType.incremental, repairType); - } - } - - @Test public void testSetRepairByKeyspace() { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index bd1f7eb8cf3e..21ee14bfb079 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -35,11 +35,13 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.schema.AutoRepairParams; import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Pair; +import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -77,6 +79,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.doAnswer; @@ -156,6 +159,8 @@ public static void setupClass() throws Exception @Before public void setup() { + System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); MockitoAnnotations.initMocks(this); Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).truncateBlocking(); @@ -179,6 +184,12 @@ public void setup() AutoRepair.shuffleFunc = java.util.Collections::shuffle; } + @After + public void tearDown() + { + System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); + System.clearProperty("cassandra.streaming.requires_cdc_replay"); + } private void resetCounters() { @@ -679,4 +690,51 @@ public void testRepairSuccessAfterRetry() verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } + + @Test + public void testRepairThrowsForIRWithMVReplay() + { + AutoRepair.instance.setup(); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); + + if (repairType == AutoRepairConfig.RepairType.incremental) + { + try + { + AutoRepair.instance.repair(repairType, 0); + fail("Expected ConfigurationException"); + } + catch (ConfigurationException ignored) + { + } + } + else + { + AutoRepair.instance.repair(repairType, 0); + } + } + + + @Test + public void testRepairThrowsForIRWithCDCReplay() + { + AutoRepair.instance.setup(); + System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); + + if (repairType == AutoRepairConfig.RepairType.incremental) + { + try + { + AutoRepair.instance.repair(repairType, 0); + fail("Expected ConfigurationException"); + } + catch (ConfigurationException ignored) + { + } + } + else + { + AutoRepair.instance.repair(repairType, 0); + } + } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index c1837aa6a2cc..3592186365d1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -21,6 +21,7 @@ import java.util.HashMap; import java.util.Map; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -47,6 +48,13 @@ public static void setupClass() throws Exception requireNetwork(); } + @Before + public void setup() + { + System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); + System.clearProperty("cassandra.streaming.requires_cdc_replay"); + } + @Test public void testSetup() { @@ -64,9 +72,10 @@ public void testSetup() } @Test(expected = ConfigurationException.class) - public void testSetupFailsWhenIREnabledWithCDC() + public void testSetupFailsWhenIREnabledWithCDCReplay() { DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); DatabaseDescriptor.setCDCEnabled(true); AutoRepair instance = new AutoRepair(); @@ -74,11 +83,10 @@ public void testSetupFailsWhenIREnabledWithCDC() } @Test(expected = ConfigurationException.class) - public void testSetupFailsWhenIREnabledWithMV() + public void testSetupFailsWhenIREnabledWithMVReplay() { DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); - DatabaseDescriptor.setMaterializedViewsEnabled(true); - + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); AutoRepair instance = new AutoRepair(); instance.setup(); } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java index 11df262f9500..1f12ad08ab65 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java @@ -28,6 +28,7 @@ import java.util.stream.Stream; import com.google.common.collect.ImmutableSet; +import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -39,6 +40,7 @@ import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; @@ -54,7 +56,7 @@ @Suite.SuiteClasses({ AutoRepairServiceTest.BasicTests.class, AutoRepairServiceTest.SetterTests.class }) public class AutoRepairServiceTest { - public static class BasicTests + public static class BasicTests extends CQLTester { private static AutoRepairService autoRepairService; private static AutoRepairConfig config; @@ -62,11 +64,21 @@ public static class BasicTests @Before public void setUp() { + System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); config = new AutoRepairConfig(); autoRepairService = new AutoRepairService(); autoRepairService.config = config; } + @After + public void tearDown() + { + System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); + System.clearProperty("cassandra.streaming.requires_cdc_replay"); + } @Test public void testSetup() @@ -109,6 +121,53 @@ public void testsetAutoRepairRetryBackoffInSec() assertEquals(102, config.getRepairRetryBackoff().toSeconds()); } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() + { + autoRepairService.config = new AutoRepairConfig(false); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() + { + autoRepairService.config = new AutoRepairConfig(true); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() + { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setMaterializedViewsEnabled(true); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() + { + autoRepairService.config = new AutoRepairConfig(true); + System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() + { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCEnabled(true); + System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } } @RunWith(Parameterized.class) @@ -196,6 +255,8 @@ public void prepare() @Test public void testSetters() { + System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); setter.accept(repairType, arg); assertEquals(arg, getter.apply(repairType)); } From c701b80f21f11cab61c86ed016d8e2bc71680350 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 27 Sep 2024 16:10:43 -0700 Subject: [PATCH 024/257] Add a nodetool command to get the nodes going through repair --- .../repair/autorepair/AutoRepair.java | 5 + .../repair/autorepair/AutoRepairConfig.java | 19 ++-- .../repair/autorepair/AutoRepairUtils.java | 27 ++--- .../cassandra/service/AutoRepairService.java | 28 ++++- .../service/AutoRepairServiceMBean.java | 6 ++ .../org/apache/cassandra/tools/NodeProbe.java | 5 + .../org/apache/cassandra/tools/NodeTool.java | 1 + .../tools/nodetool/AutoRepairStatus.java | 70 ++++++++++++ .../tools/nodetool/GetAutoRepairConfig.java | 4 +- .../autorepair/AutoRepairConfigTest.java | 8 +- .../AutoRepairParameterizedTest.java | 7 ++ .../autorepair/AutoRepairUtilsTest.java | 17 ++- .../service/AutoRepairServiceTest.java | 51 ++++++++- .../tools/nodetool/AutoRepairStatusTest.java | 102 ++++++++++++++++++ 14 files changed, 304 insertions(+), 46 deletions(-) create mode 100644 src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java create mode 100644 test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index cef97595cffc..73a929ea0d45 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -37,6 +37,7 @@ import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Pair; import org.slf4j.Logger; @@ -125,6 +126,10 @@ public void setup() // repairAsync runs a repair session of the given type asynchronously. public void repairAsync(AutoRepairConfig.RepairType repairType, long millisToWait) { + if (!AutoRepairService.instance.getAutoRepairConfig().isAutoRepairEnabled(repairType)) + { + throw new ConfigurationException("Auto-repair is disabled for repair type " + repairType); + } repairExecutors.get(repairType).submit(() -> repair(repairType, millisToWait)); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index b43262e9a4e4..09e49d3d6cfc 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -230,26 +230,26 @@ public void setRepairPrimaryTokenRangeOnly(RepairType repairType, boolean primar repair_type_overrides.get(repairType).repair_primary_token_range_only = primaryTokenRangeOnly; } - public int getParallelRepairPercentageInGroup(RepairType repairType) + public int getParallelRepairPercentage(RepairType repairType) { return applyOverrides(repairType, opt -> opt.parallel_repair_percentage); } - public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup) + public void setParallelRepairPercentage(RepairType repairType, int percentage) { ensureOverrides(repairType); - repair_type_overrides.get(repairType).parallel_repair_percentage = percentageInGroup; + repair_type_overrides.get(repairType).parallel_repair_percentage = percentage; } - public int getParallelRepairCountInGroup(RepairType repairType) + public int getParallelRepairCount(RepairType repairType) { return applyOverrides(repairType, opt -> opt.parallel_repair_count); } - public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup) + public void setParallelRepairCount(RepairType repairType, int count) { ensureOverrides(repairType); - repair_type_overrides.get(repairType).parallel_repair_count = countInGroup; + repair_type_overrides.get(repairType).parallel_repair_count = count; } public boolean getMVRepairEnabled(RepairType repairType) @@ -305,9 +305,6 @@ public void setRepairSessionTimeout(RepairType repairType, String repairSessionT // All fields can be modified dynamically. public static class Options implements Serializable { - // The separator separating different DCs in repair_dc_groups - public static final String DC_GROUP_SEPARATOR = "\\|"; - // defaultOptions defines the default auto-repair behavior when no overrides are defined @VisibleForTesting protected static final Options defaultOptions = getDefaultOptions(); @@ -414,8 +411,8 @@ public String toString() ", repair_by_keyspace=" + repair_by_keyspace + ", number_of_subranges=" + number_of_subranges + ", number_of_repair_threads=" + number_of_repair_threads + - ", parallel_repair_count_in_group=" + parallel_repair_count + - ", parallel_repair_percentage_in_group=" + parallel_repair_percentage + + ", parallel_repair_count=" + parallel_repair_count + + ", parallel_repair_percentage=" + parallel_repair_percentage + ", sstable_upper_threshold=" + sstable_upper_threshold + ", min_repair_interval=" + min_repair_interval + ", ignore_dcs=" + ignore_dcs + diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index b74157244ce1..007a96e1d6d6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -298,7 +298,7 @@ public String toString() } @VisibleForTesting - public static List getAutoRepairHistoryByGroupID(RepairType repairType) + public static List getAutoRepairHistory(RepairType repairType) { UntypedResultSet repairHistoryResult; @@ -329,11 +329,6 @@ public static List getAutoRepairHistoryByGroupID(RepairType r return null; } - public static List getAutoRepairHistoryForLocalGroup(RepairType repairType) - { - return getAutoRepairHistoryByGroupID(repairType); - } - // A host may add itself in delete hosts for some other hosts due to restart or some temp gossip issue. If a node's record // delete_hosts is not growing for more than 2 hours, we consider it as a normal node so we clear the delete_hosts for that node public static void clearDeleteHosts(RepairType repairType, UUID hostId) @@ -376,7 +371,7 @@ public static void setForceRepair(RepairType repairType, UUID hostId) public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType) { - List autoRepairHistories = getAutoRepairHistoryForLocalGroup(repairType); + List autoRepairHistories = getAutoRepairHistory(repairType); return getCurrentRepairStatus(repairType, autoRepairHistories); } @@ -447,7 +442,7 @@ public static TreeSet getHostIdsInCurrentRing(RepairType repairType) // This function will return the host ID for the node which has not been repaired for longest time public static AutoRepairHistory getHostWithLongestUnrepairTime(RepairType repairType) { - List autoRepairHistories = getAutoRepairHistoryForLocalGroup(repairType); + List autoRepairHistories = getAutoRepairHistory(repairType); return getHostWithLongestUnrepairTime(autoRepairHistories); } @@ -470,16 +465,16 @@ private static AutoRepairHistory getHostWithLongestUnrepairTime(List hostIdsInCurrentRing = getHostIdsInCurrentRing(repairType, allNodesInRing); logger.info("Total nodes qualified for repair {}", hostIdsInCurrentRing.size()); - List autoRepairHistories = getAutoRepairHistoryForLocalGroup(repairType); + List autoRepairHistories = getAutoRepairHistory(repairType); Set autoRepairHistoryIds = new HashSet<>(); // 1. Remove any node that is not part of group based on goissip info @@ -557,7 +552,7 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } - int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepairInGroup(repairType, + int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepair(repairType, autoRepairHistories == null ? 0 : autoRepairHistories.size()); logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); @@ -572,7 +567,7 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) else { // try to fetch again - autoRepairHistories = getAutoRepairHistoryForLocalGroup(repairType); + autoRepairHistories = getAutoRepairHistory(repairType); currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); if (autoRepairHistories == null || currentRepairStatus == null) { @@ -636,7 +631,7 @@ else if (currentRepairStatus.hostIdsWithOnGoingForceRepair.contains(myId)) static void deleteAutoRepairHistory(RepairType repairType, UUID hostId) { - //delete the given hostId from current local group + //delete the given hostId delStatementRepairHistory.execute(QueryState.forInternalCalls(), QueryOptions.forInternalCalls(internalQueryCL, Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()), diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 3d59207a8c28..c0b305f2974d 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -26,7 +26,10 @@ import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.utils.MBeanWrapper; +import java.util.HashSet; +import java.util.List; import java.util.Set; +import java.util.UUID; import com.google.common.annotations.VisibleForTesting; @@ -161,13 +164,13 @@ public void setPrimaryTokenRangeOnly(RepairType repairType, boolean primaryToken @Override public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup) { - config.setParallelRepairPercentageInGroup(repairType, percentageInGroup); + config.setParallelRepairPercentage(repairType, percentageInGroup); } @Override public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup) { - config.setParallelRepairCountInGroup(repairType, countInGroup); + config.setParallelRepairCount(repairType, countInGroup); } public void setMVRepairEnabled(RepairType repairType, boolean enabled) @@ -179,4 +182,25 @@ public void setRepairSessionTimeout(RepairType repairType, String timeout) { config.setRepairSessionTimeout(repairType, timeout); } + + @Override + public Set getOnGoingRepairHostIds(RepairType rType) + { + Set hostIds = new HashSet<>(); + List histories = AutoRepairUtils.getAutoRepairHistory(rType); + if (histories == null) + { + return null; + } + AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(rType)); + for (UUID id : currentRepairStatus.hostIdsWithOnGoingRepair) + { + hostIds.add(id.toString()); + } + for (UUID id : currentRepairStatus.hostIdsWithOnGoingForceRepair) + { + hostIds.add(id.toString()); + } + return hostIds; + } } diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 4ebf118e023e..5e62e5630043 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -45,15 +45,19 @@ public interface AutoRepairServiceMBean public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); public void setAutoRepairMaxRetriesCount(int retries); + public void setAutoRepairRetryBackoff(String interval); + public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssTableHigherThreshold); public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime); + public void setIgnoreDCs(RepairType repairType, Set ignorDCs); public void setPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly); public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup); + public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup); public void setMVRepairEnabled(RepairType repairType, boolean enabled); @@ -61,4 +65,6 @@ public interface AutoRepairServiceMBean public AutoRepairConfig getAutoRepairConfig(); public void setRepairSessionTimeout(RepairType repairType, String timeout); + + public Set getOnGoingRepairHostIds(RepairType rType); } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index c10bd5299a24..4a1d6bdecb07 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2663,6 +2663,11 @@ public void setRepairSessionTimeout(AutoRepairConfig.RepairType repairType, Stri { autoRepairProxy.setRepairSessionTimeout(repairType, timeout); } + + public Set getOnGoingRepairHostIds(AutoRepairConfig.RepairType type) + { + return autoRepairProxy.getOnGoingRepairHostIds(type); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index ce0fc7ef225e..3e025df1652e 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -94,6 +94,7 @@ public int execute(String... args) AbortBootstrap.class, AlterTopology.class, Assassinate.class, + AutoRepairStatus.class, CassHelp.class, CIDRFilteringStats.class, Cleanup.class, diff --git a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java new file mode 100644 index 000000000000..b29095572a2f --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.PrintStream; +import java.util.Set; + +import com.google.common.annotations.VisibleForTesting; + +import io.airlift.airline.Command; +import io.airlift.airline.Option; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool; +import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; + +import static com.google.common.base.Preconditions.checkArgument; + +@Command(name = "autorepairstatus", description = "Print autorepair status") +public class AutoRepairStatus extends NodeTool.NodeToolCmd +{ + @VisibleForTesting + @Option(title = "repair type", name = { "-t", "--repair-type" }, description = "Repair type") + protected AutoRepairConfig.RepairType repairType; + + @Override + public void execute(NodeProbe probe) + { + checkArgument(repairType != null, "--repair-type is required."); + PrintStream out = probe.output().out; + TableBuilder table = new TableBuilder(); + table.add("Active Repairs"); + Set ongoingRepairHostIds = probe.getOnGoingRepairHostIds(repairType); + table.add(getSetString(ongoingRepairHostIds)); + table.printTo(out); + } + + private String getSetString(Set hostIds) + { + if (hostIds.isEmpty()) + { + return "EMPTY"; + } + StringBuilder sb = new StringBuilder(); + for (String id : hostIds) + { + sb.append(id); + sb.append(","); + } + // remove last "," + sb.setLength(Math.max(sb.length() - 1, 0)); + return sb.toString(); + } +} diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index 30dd1ff889c4..c62063b13b7f 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -70,8 +70,8 @@ private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, Au sb.append("\n\ttable max repair time in sec: " + config.getAutoRepairTableMaxRepairTime(repairType)); sb.append("\n\tignore datacenters: " + Joiner.on(',').skipNulls().join(config.getIgnoreDCs(repairType))); sb.append("\n\trepair primary token-range: " + config.getRepairPrimaryTokenRangeOnly(repairType)); - sb.append("\n\tnumber of parallel repairs within group: " + config.getParallelRepairCountInGroup(repairType)); - sb.append("\n\tpercentage of parallel repairs within group: " + config.getParallelRepairPercentageInGroup(repairType)); + sb.append("\n\tnumber of parallel repairs within group: " + config.getParallelRepairCount(repairType)); + sb.append("\n\tpercentage of parallel repairs within group: " + config.getParallelRepairPercentage(repairType)); sb.append("\n\tmv repair enabled: " + config.getMVRepairEnabled(repairType)); sb.append("\n\tinitial scheduler delay: " + config.getInitialSchedulerDelay(repairType)); sb.append("\n\trepair setssion timeout: " + config.getRepairSessionTimeout(repairType)); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index da7154077bc2..163870df6120 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -285,7 +285,7 @@ public void testGetParallelRepairPercentageInGroup() { config.global_settings.parallel_repair_percentage = 5; - int result = config.getParallelRepairPercentageInGroup(repairType); + int result = config.getParallelRepairPercentage(repairType); assertEquals(5, result); } @@ -293,7 +293,7 @@ public void testGetParallelRepairPercentageInGroup() @Test public void testSetParallelRepairPercentageInGroup() { - config.setParallelRepairPercentageInGroup(repairType, 5); + config.setParallelRepairPercentage(repairType, 5); assert config.repair_type_overrides.get(repairType).parallel_repair_percentage == 5; } @@ -303,7 +303,7 @@ public void testGetParallelRepairCountInGroup() { config.global_settings.parallel_repair_count = 5; - int result = config.getParallelRepairCountInGroup(repairType); + int result = config.getParallelRepairCount(repairType); assertEquals(5, result); } @@ -311,7 +311,7 @@ public void testGetParallelRepairCountInGroup() @Test public void testSetParallelRepairCountInGroup() { - config.setParallelRepairCountInGroup(repairType, 5); + config.setParallelRepairCount(repairType, 5); assert config.repair_type_overrides.get(repairType).parallel_repair_count == 5; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 21ee14bfb079..445e31770158 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -227,6 +227,13 @@ private void executeCQL() .forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); } + @Test(expected = ConfigurationException.class) + public void testRepairAsyncWithRepairTypeDisabled() + { + AutoRepairService.instance.getAutoRepairConfig().setAutoRepairEnabled(repairType, false); + + AutoRepair.instance.repairAsync(repairType, 60); + } @Test public void testRepairAsync() diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index e529be786bb5..1fb039b4bfdb 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -174,8 +174,7 @@ public void testGetAutoRepairHistoryForLocalGroup() SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); - List history = AutoRepairUtils.getAutoRepairHistoryForLocalGroup(repairType); - + List history = AutoRepairUtils.getAutoRepairHistory(repairType); assertNotNull(history); assertEquals(1, history.size()); assertEquals(hostId, history.get(0).hostId); @@ -184,7 +183,7 @@ public void testGetAutoRepairHistoryForLocalGroup() @Test public void testGetAutoRepairHistoryForLocalGroup_empty_history() { - List history = AutoRepairUtils.getAutoRepairHistoryForLocalGroup(repairType); + List history = AutoRepairUtils.getAutoRepairHistory(repairType); assertNull(history); } @@ -273,9 +272,9 @@ public void testGetHostWithLongestUnrepairTime() @Test public void testGetMaxNumberOfNodeRunAutoRepairInGroup_0_group_size() { - DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCountInGroup(repairType, 2); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 2); - int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepairInGroup(repairType, 0); + int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepair(repairType, 0); assertEquals(2, count); } @@ -284,11 +283,11 @@ public void testGetMaxNumberOfNodeRunAutoRepairInGroup_0_group_size() @Test public void testGetMaxNumberOfNodeRunAutoRepairInGroup_percentage() { - DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCountInGroup(repairType, 2); - DatabaseDescriptor.getAutoRepairConfig().setParallelRepairPercentageInGroup(repairType, 50); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 2); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairPercentage(repairType, 50); - int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepairInGroup(repairType, 10); + int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepair(repairType, 10); assertEquals(5, count); } @@ -467,7 +466,7 @@ public void testMyTurnToRunRepairShouldReturnMyTurnWhenRepairOngoing() { UUID myID = UUID.randomUUID(); UUID otherID = UUID.randomUUID(); - DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCountInGroup(repairType, 5); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 5); long currentMillis = System.currentTimeMillis(); // finish time less than start time means that repair is ongoing AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java index 1f12ad08ab65..e3c9c111db61 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java @@ -50,6 +50,7 @@ import org.apache.cassandra.utils.FBUtilities; import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.junit.Assert.assertEquals; @RunWith(Suite.class) @@ -170,6 +171,52 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() } } + @RunWith(Parameterized.class) + public static class RepairTypeTests extends CQLTester + { + @Parameterized.Parameter() + public AutoRepairConfig.RepairType repairType; + + private final UUID host1 = UUID.fromString("00000000-0000-0000-0000-000000000001"); + private final UUID host2 = UUID.fromString("00000000-0000-0000-0000-000000000002"); + + private AutoRepairService instance; + + @Parameterized.Parameters(name = "repairType={0}") + public static Collection repairTypes() + { + return Arrays.asList(AutoRepairConfig.RepairType.values()); + } + + + @BeforeClass + public static void setupClass() throws Exception + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + setAutoRepairEnabled(true); + requireNetwork(); + } + + @Before + public void setUpTest() + { + AutoRepairUtils.setup(); + instance = new AutoRepairService(); + } + + @Test + public void testGetOnGoingRepairHostIds() + { + long now = System.currentTimeMillis(); + AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); + AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); + + Set hosts = instance.getOnGoingRepairHostIds(repairType); + + assertEquals(ImmutableSet.of(host1.toString(), host2.toString()), hosts); + } + } + @RunWith(Parameterized.class) public static class SetterTests extends CQLTester { @@ -197,8 +244,8 @@ public static Collection testCases() forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), - forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentageInGroup, config::getParallelRepairPercentageInGroup), - forEachRepairType(700, AutoRepairService.instance::setParallelRepairCountInGroup, config::getParallelRepairCountInGroup), + forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentageInGroup, config::getParallelRepairPercentage), + forEachRepairType(700, AutoRepairService.instance::setParallelRepairCountInGroup, config::getParallelRepairCount), forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, SetterTests::isLocalHostForceRepair) diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java new file mode 100644 index 000000000000..87ab1a2f02c7 --- /dev/null +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.Arrays; +import java.util.Collection; + +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.Output; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.when; + +@RunWith(Parameterized.class) +public class AutoRepairStatusTest +{ + @Mock + private static NodeProbe probe; + + private ByteArrayOutputStream cmdOutput; + + @Mock + private static AutoRepairConfig config; + + private static AutoRepairStatus cmd; + + @Parameterized.Parameter() + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters(name = "repairType={0}") + public static Collection repairTypes() + { + return Arrays.asList(AutoRepairConfig.RepairType.values()); + } + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + cmdOutput = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(cmdOutput); + when(probe.output()).thenReturn(new Output(out, out)); + when(probe.getAutoRepairConfig()).thenReturn(config); + cmd = new AutoRepairStatus(); + } + + @Test(expected = IllegalArgumentException.class) + public void testExecuteWithoutRepairType() + { + cmd.repairType = null; + cmd.execute(probe); + } + + @Test + public void testExecuteWithNoNodes() + { + cmd.repairType = repairType; + + cmd.execute(probe); + assertEquals("Active Repairs\n" + + "EMPTY \n", cmdOutput.toString()); + } + + @Test + public void testExecute() + { + when(probe.getOnGoingRepairHostIds(repairType)).thenReturn(ImmutableSet.of("host1", "host2", "host3", "host4")); + cmd.repairType = repairType; + + cmd.execute(probe); + + assertEquals("Active Repairs \n" + + "host1,host2,host3,host4\n", cmdOutput.toString()); + } +} From 713c84a2ebab357abed8278a85e8bc4a511ae63f Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 27 Sep 2024 16:15:30 -0700 Subject: [PATCH 025/257] Adjust a couple of default configuration values --- .../apache/cassandra/repair/autorepair/AutoRepairConfig.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 09e49d3d6cfc..ac9394d9cfe1 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -49,7 +49,7 @@ public class AutoRepairConfig implements Serializable // the maximum number of retries for a repair session. public volatile Integer repair_max_retries = 3; // the backoff time in seconds for retrying a repair session. - public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("60s"); + public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); // global_settings overides Options.defaultOptions for all repair types public volatile Options global_settings; @@ -332,7 +332,7 @@ protected static Options getDefaultOptions() opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.mv_repair_enabled = false; opts.token_range_splitter = DefaultAutoRepairTokenSplitter.class.getName(); - opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("15m"); // 15 minutes + opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); // 5 minutes opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours return opts; From a3bae804161f40b8cc0039a9882cdd3a480fa704 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 27 Sep 2024 16:31:43 -0700 Subject: [PATCH 026/257] Rename EMPTY --> NONE for Nodetool autorepairstatus command --- .../org/apache/cassandra/tools/nodetool/AutoRepairStatus.java | 2 +- .../apache/cassandra/tools/nodetool/AutoRepairStatusTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java index b29095572a2f..882f2a4d3cda 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java +++ b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java @@ -55,7 +55,7 @@ private String getSetString(Set hostIds) { if (hostIds.isEmpty()) { - return "EMPTY"; + return "NONE"; } StringBuilder sb = new StringBuilder(); for (String id : hostIds) diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java index 87ab1a2f02c7..a92f9ec960fe 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -85,7 +85,7 @@ public void testExecuteWithNoNodes() cmd.execute(probe); assertEquals("Active Repairs\n" + - "EMPTY \n", cmdOutput.toString()); + "NONE \n", cmdOutput.toString()); } @Test From 36667f279713caa3f954d748d895cbbcef2f7460 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 27 Sep 2024 17:08:45 -0700 Subject: [PATCH 027/257] Minor code refactoring to simplify the repair API by removing the SLEEP_IF_REPAIR_FINISHES_QUICKLY --- .../repair/autorepair/AutoRepair.java | 22 ++++--- .../autorepair/AutoRepairConfigTest.java | 1 + .../autorepair/AutoRepairKeyspaceTest.java | 2 + .../AutoRepairParameterizedTest.java | 62 ++++++++++--------- .../autorepair/AutoRepairStateTest.java | 1 + .../repair/autorepair/AutoRepairTest.java | 2 + .../autorepair/AutoRepairUtilsTest.java | 2 + 7 files changed, 53 insertions(+), 39 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 73a929ea0d45..1df4ad668e96 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -37,6 +37,7 @@ import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Pair; @@ -71,6 +72,9 @@ public class AutoRepair public static AutoRepair instance = new AutoRepair(); + // Sleep for 5 seconds if repair finishes quickly to flush JMX metrics; it happens only for Cassandra nodes with tiny amount of data. + public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); + @VisibleForTesting protected final Map repairExecutors; @@ -116,7 +120,7 @@ public void setup() AutoRepairService.instance.checkCanRun(repairType); repairExecutors.get(repairType).scheduleWithFixedDelay( - () -> repair(repairType, 5000), + () -> repair(repairType), config.getInitialSchedulerDelay(repairType).toSeconds(), config.getRepairCheckInterval().toSeconds(), TimeUnit.SECONDS); @@ -124,17 +128,17 @@ public void setup() } // repairAsync runs a repair session of the given type asynchronously. - public void repairAsync(AutoRepairConfig.RepairType repairType, long millisToWait) + public void repairAsync(AutoRepairConfig.RepairType repairType) { if (!AutoRepairService.instance.getAutoRepairConfig().isAutoRepairEnabled(repairType)) { throw new ConfigurationException("Auto-repair is disabled for repair type " + repairType); } - repairExecutors.get(repairType).submit(() -> repair(repairType, millisToWait)); + repairExecutors.get(repairType).submit(() -> repair(repairType)); } // repair runs a repair session of the given type synchronously. - public void repair(AutoRepairConfig.RepairType repairType, long millisToWait) + public void repair(AutoRepairConfig.RepairType repairType) { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); if (!config.isAutoRepairEnabled(repairType)) @@ -351,7 +355,7 @@ else if (retryCount < config.getRepairMaxRetries()) } } } - cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, millisToWait, failedTokenRanges, succeededTokenRanges, skippedTokenRanges); + cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, failedTokenRanges, succeededTokenRanges, skippedTokenRanges); } else { @@ -410,7 +414,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon } private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, - long startTime, long millisToWait, int failedTokenRanges, int succeededTokenRanges, int skippedTokenRanges) throws InterruptedException + long startTime, int failedTokenRanges, int succeededTokenRanges, int skippedTokenRanges) throws InterruptedException { //if it was due to priority then remove it now if (turn == MY_TURN_DUE_TO_PRIORITY) @@ -437,12 +441,12 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType TimeUnit.SECONDS.toDays(repairState.getClusterRepairTimeInSec())); } repairState.setLastRepairTime(timeFunc.get()); - if (timeInHours == 0 && millisToWait > 0) + if (timeInHours == 0 && SLEEP_IF_REPAIR_FINISHES_QUICKLY.toSeconds() > 0) { //If repair finished quickly, happens for an empty instance, in such case //wait for some duration so that the JMX metrics can detect the repairInProgress - logger.info("Wait for {} milliseconds for repair type {}.", millisToWait, repairType); - Thread.sleep(millisToWait); + logger.info("Wait for {} for repair type {}.", SLEEP_IF_REPAIR_FINISHES_QUICKLY, repairType); + Thread.sleep(SLEEP_IF_REPAIR_FINISHES_QUICKLY.toMilliseconds()); } repairState.setRepairInProgress(false); AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId, timeFunc.get()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 163870df6120..c42959eec32b 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -62,6 +62,7 @@ public void setUp() { config = new AutoRepairConfig(true); config.repair_type_overrides = null; + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java index 579b996a12f5..43b8e4a5f4e8 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -27,6 +27,7 @@ import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.TableMetadata; @@ -43,6 +44,7 @@ public class AutoRepairKeyspaceTest public static void setupDatabaseDescriptor() { DatabaseDescriptor.daemonInitialization(); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 445e31770158..a435688c3965 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -31,6 +31,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.statements.schema.TableAttributes; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -119,6 +120,7 @@ public static Collection repairTypes() @BeforeClass public static void setupClass() throws Exception { + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); setAutoRepairEnabled(true); requireNetwork(); @@ -232,7 +234,7 @@ public void testRepairAsyncWithRepairTypeDisabled() { AutoRepairService.instance.getAutoRepairConfig().setAutoRepairEnabled(repairType, false); - AutoRepair.instance.repairAsync(repairType, 60); + AutoRepair.instance.repairAsync(repairType); } @Test @@ -240,7 +242,7 @@ public void testRepairAsync() { AutoRepair.instance.repairExecutors.put(repairType, mockExecutor); - AutoRepair.instance.repairAsync(repairType, 60); + AutoRepair.instance.repairAsync(repairType); verify(mockExecutor, Mockito.times(1)).submit(Mockito.any(Runnable.class)); @@ -257,7 +259,7 @@ public void testRepairTurn() public void testRepair() { AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); long lastRepairTime = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); @@ -272,7 +274,7 @@ public void testTooFrequentRepairs() AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); //in the first round let repair run config.setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), @@ -280,7 +282,7 @@ public void testTooFrequentRepairs() //if repair was done in last 24 hours then it should not trigger another repair config.setRepairMinInterval(repairType, "24h"); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertEquals(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); @@ -295,14 +297,14 @@ public void testNonFrequentRepairs() AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); long prevCount = state.getTotalMVTablesConsideredForRepair(); AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertTrue(String.format("Expected lastRepairTime1 > 0, actual value lastRepairTime1 %d", lastRepairTime1), lastRepairTime1 > 0); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertNotSame(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + "lastRepairTime2 ", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); @@ -322,9 +324,9 @@ public void testGetPriorityHosts() UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); AutoRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); Assert.assertSame(String.format("Priority host count is not same actual value %d, expected value %d", AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); @@ -341,14 +343,14 @@ public void testCheckAutoRepairStartStop() throws Throwable config.setRepairMinInterval(repairType, "0s"); config.setAutoRepairEnabled(repairType, false); long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); //Since repair has not happened, both the last repair times should be same Assert.assertEquals(String.format("Expected lastRepairTime1 %d, and lastRepairTime2 %d to be same", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); config.setAutoRepairEnabled(repairType, true); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); //since repair is done now, so lastRepairTime1/lastRepairTime2 and lastRepairTime3 should not be same long lastRepairTime3 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertNotSame(String.format("Expected lastRepairTime1 %d, and lastRepairTime3 %d to be not same", @@ -387,19 +389,19 @@ public void testMVRepair() config.setMVRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); config.setMVRepairEnabled(repairType, false); AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); config.setMVRepairEnabled(repairType, true); AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); } @@ -438,7 +440,7 @@ public void testSkipRepairSSTableCountHigherThreshold() assertEquals(0, state.getSkippedTokenRangesCount()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); state.setLastRepairTime(0); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(1, state.getTotalMVTablesConsideredForRepair()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); // skipping one time for the base table and another time for MV table @@ -449,7 +451,7 @@ public void testSkipRepairSSTableCountHigherThreshold() config.setRepairSSTableCountHigherThreshold(repairType, 11); config.setRepairSSTableCountHigherThreshold(repairType, beforeCount); state.setLastRepairTime(0); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(1, state.getTotalMVTablesConsideredForRepair()); assertEquals(0, state.getSkippedTokenRangesCount()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); @@ -480,7 +482,7 @@ public void testMetrics() }; AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(1000L); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); assertTrue(AutoRepairMetricsManager.getMetrics(repairType).nodeRepairTimeInSec.getValue() > 0); @@ -497,7 +499,7 @@ public void testMetrics() when(autoRepairState.getSucceededTokenRangesCount()).thenReturn(11); when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); assertTrue(AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue() > 0); assertTrue(AutoRepairMetricsManager.getMetrics(repairType).succeededTokenRangesCount.getValue() > 0); @@ -529,9 +531,9 @@ public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws return null; }).when(autoRepairState).waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); - AutoRepair.instance.repair(repairType, 0); - AutoRepair.instance.repair(repairType, 0); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); } @Test @@ -545,7 +547,7 @@ public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairMinInterval(repairType, "0s"); int disabledTablesRepairCountBefore = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), consideredTables, 0); @@ -618,7 +620,7 @@ else if (list.get(0) instanceof String) AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(1, shuffleKeyspacesCall.get()); assertEquals(4, shuffleTablesCall.get()); @@ -634,7 +636,7 @@ public void testRepairTakesLastRepairTimeFromDB() AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); config.setRepairMinInterval(repairType, "1h"); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); // repair scheduler should not attempt to run repair as last repair time in DB is current time - 1s assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair()); @@ -657,7 +659,7 @@ public void testRepairMaxRetries() config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repairStates.put(repairType, autoRepairState); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); //system_auth.role_permissions,system_auth.network_permissions,system_auth.role_members,system_auth.roles, // system_auth.resource_role_permissons_index,system_traces.sessions,system_traces.events,ks.tbl, @@ -690,7 +692,7 @@ public void testRepairSuccessAfterRetry() }); config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repairStates.put(repairType, autoRepairState); - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(14); @@ -708,7 +710,7 @@ public void testRepairThrowsForIRWithMVReplay() { try { - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); fail("Expected ConfigurationException"); } catch (ConfigurationException ignored) @@ -717,7 +719,7 @@ public void testRepairThrowsForIRWithMVReplay() } else { - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); } } @@ -732,7 +734,7 @@ public void testRepairThrowsForIRWithCDCReplay() { try { - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); fail("Expected ConfigurationException"); } catch (ConfigurationException ignored) @@ -741,7 +743,7 @@ public void testRepairThrowsForIRWithCDCReplay() } else { - AutoRepair.instance.repair(repairType, 0); + AutoRepair.instance.repair(repairType); } } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index 657b11310f64..f0974dd83c1a 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -68,6 +68,7 @@ public static Collection repairTypes() @Before public void setUp() { + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); initMocks(this); createTable(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int)", KEYSPACE, testTable)); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 3592186365d1..6137983b3ea7 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -27,6 +27,7 @@ import org.junit.Assert; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.exceptions.ConfigurationException; @@ -51,6 +52,7 @@ public static void setupClass() throws Exception @Before public void setup() { + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); System.clearProperty("cassandra.streaming.requires_cdc_replay"); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 1fb039b4bfdb..1e9758f53240 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -31,6 +31,7 @@ import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.marshal.UTF8Type; @@ -103,6 +104,7 @@ public static void setupClass() throws Exception @Before public void setup() { + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); MockitoAnnotations.initMocks(this); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", From 8d65b2d59e748ba3cd1e01d68c59050b9c82dbc1 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 29 Sep 2024 21:53:26 -0700 Subject: [PATCH 028/257] Adjust AutoRepair on 5.0 --- .../config/CassandraRelevantProperties.java | 18 +- .../cassandra/metrics/AutoRepairMetrics.java | 1 + .../metrics/CassandraMetricsRegistry.java | 1 + .../repair/autorepair/AutoRepair.java | 1 - .../repair/autorepair/AutoRepairKeyspace.java | 4 +- .../schema/SystemDistributedKeyspace.java | 5 +- .../cassandra/service/AutoRepairService.java | 2 +- .../tcm/transformations/cms/Initialize.java | 1 + .../test/repair/AutoRepairSchedulerTest.java | 7 +- .../org/apache/cassandra/cql3/CQLTester.java | 2 + .../streaming/CassandraSreamReceiverTest.java | 11 +- .../AutoRepairParameterizedTest.java | 84 ++--- .../repair/autorepair/AutoRepairTest.java | 6 +- .../autorepair/AutoRepairUtilsTest.java | 18 +- .../service/AutoRepairServiceBasicTest.java | 126 +++++++ .../AutoRepairServiceRepairTypeTest.java | 79 +++++ .../service/AutoRepairServiceSetterTest.java | 132 ++++++++ .../service/AutoRepairServiceTest.java | 311 ------------------ 18 files changed, 405 insertions(+), 404 deletions(-) create mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java create mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java create mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java delete mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 6b800d10d8fe..e44ba180f146 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -550,7 +550,14 @@ public enum CassandraRelevantProperties STORAGE_HOOK("cassandra.storage_hook"), STORAGE_PORT("cassandra.storage_port"), STREAMING_HISTOGRAM_ROUND_SECONDS("cassandra.streaminghistogram.roundseconds", "60"), + + /** + * If set to true, mutations streamed during anti-entropy repair will be replayed via the regular write path for associated views. + */ + STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR("cassandra.streaming.requires_view_build_during_repair", "true"), + STREAMING_SESSION_PARALLELTRANSFERS("cassandra.streaming.session.parallelTransfers"), + STREAM_HOOK("cassandra.stream_hook"), /** Platform word size sun.arch.data.model. Examples: "32", "64", "unknown"*/ SUN_ARCH_DATA_MODEL("sun.arch.data.model"), @@ -674,17 +681,6 @@ public enum CassandraRelevantProperties /** Gossiper compute expiration timeout. Default value 3 days. */ VERY_LONG_TIME_MS("cassandra.very_long_time_ms", "259200000"), WAIT_FOR_TRACING_EVENTS_TIMEOUT_SECS("cassandra.wait_for_tracing_events_timeout_secs", "0"), - /** - * If set to true, mutations streamed during anti-entropy repair will be replayed via the regular write path for associated views. - */ - STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR("cassandra.streaming.requires_view_build_during_repair", "true"), - - /** - * If set to true, streamed mutations via the regular write path for CDC. - * Deprecate this property in trunk (or 5.0) as a new config has been added to control this https://issues.apache.org/jira/browse/CASSANDRA-17666 - */ - STREAMING_REQUIRES_CDC_REPLAY("cassandra.streaming.requires_cdc_replay", "true"), - ; static diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index d765899631b7..dedd8f170170 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -31,6 +31,7 @@ */ public class AutoRepairMetrics { + public static final String TYPE_NAME = "autorepair"; public Gauge repairsInProgress; public Gauge nodeRepairTimeInSec; public Gauge clusterRepairTimeInSec; diff --git a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java index b11474875916..11b36c606a3a 100644 --- a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java +++ b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java @@ -154,6 +154,7 @@ public class CassandraMetricsRegistry extends MetricRegistry .add(ThreadPoolMetrics.TYPE_NAME) .add(TrieMemtableMetricsView.TYPE_NAME) .add(UnweightedCacheMetrics.TYPE_NAME) + .add(AutoRepairMetrics.TYPE_NAME) .build(); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 1df4ad668e96..d59e4cfea643 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -50,7 +50,6 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; import org.apache.cassandra.service.AutoRepairService; diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java index 14cba17f6095..5afd2fff3275 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java @@ -45,7 +45,7 @@ private AutoRepairKeyspace() public static final String AUTO_REPAIR_PRIORITY = "auto_repair_priority"; - private static final TableMetadata AutoRepairHistory = + public static final TableMetadata AutoRepairHistory = parse(AUTO_REPAIR_HISTORY, "Auto repair history for each node", "CREATE TABLE %s (" @@ -59,7 +59,7 @@ private AutoRepairKeyspace() + "force_repair boolean," + "PRIMARY KEY (repair_type, host_id))"); - private static final TableMetadata AutoRepairPriority = + public static final TableMetadata AutoRepairPriority = parse(AUTO_REPAIR_PRIORITY, "Auto repair priority for each group", "CREATE TABLE %s (" diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index 2ec600a13d97..c2cd0540af68 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -56,6 +56,9 @@ import org.apache.cassandra.utils.TimeUUID; import static java.lang.String.format; + +import static org.apache.cassandra.repair.autorepair.AutoRepairKeyspace.AutoRepairHistory; +import static org.apache.cassandra.repair.autorepair.AutoRepairKeyspace.AutoRepairPriority; import static org.apache.cassandra.utils.ByteBufferUtil.bytes; public final class SystemDistributedKeyspace @@ -169,7 +172,7 @@ public static KeyspaceMetadata metadata() { return KeyspaceMetadata.create(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, KeyspaceParams.simple(Math.max(DEFAULT_RF, DatabaseDescriptor.getDefaultKeyspaceRF())), - Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable)); + Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable, AutoRepairHistory, AutoRepairPriority)); } public static void startParentRepair(TimeUUID parent_id, String keyspaceName, String[] cfnames, RepairOption options) diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index c0b305f2974d..1adad42b263d 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -68,7 +68,7 @@ public void checkCanRun(RepairType repairType) if (CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.getBoolean()) throw new ConfigurationException("Cannot run incremental repair while materialized view replay is enabled."); - if (CassandraRelevantProperties.STREAMING_REQUIRES_CDC_REPLAY.getBoolean()) + if (DatabaseDescriptor.isCDCOnRepairEnabled()) throw new ConfigurationException("Cannot run incremental repair while CDC replay is enabled."); } diff --git a/src/java/org/apache/cassandra/tcm/transformations/cms/Initialize.java b/src/java/org/apache/cassandra/tcm/transformations/cms/Initialize.java index 11c2ed4d31af..635a7942260b 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/cms/Initialize.java +++ b/src/java/org/apache/cassandra/tcm/transformations/cms/Initialize.java @@ -24,6 +24,7 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.MetaStrategy; +import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.SystemDistributedKeyspace; diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 49b4300dc50b..5a49f669dc34 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -21,10 +21,12 @@ import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.UUID; import java.util.concurrent.TimeUnit; import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.cassandra.config.DatabaseDescriptor; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -48,6 +50,8 @@ public class AutoRepairSchedulerTest extends TestBaseImpl @BeforeClass public static void init() throws IOException { + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + // Define the expected date format pattern String pattern = "EEE MMM dd HH:mm:ss z yyyy"; // Create SimpleDateFormat object with the given pattern @@ -88,6 +92,7 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { + DatabaseDescriptor.setCDCOnRepairEnabled(false); AutoRepair.instance.setup(); } catch (Exception e) @@ -112,7 +117,7 @@ private void validate(String repairType) throws ParseException // repair_type Assert.assertEquals(repairType, row[0].toString()); // host_id - Assert.assertEquals(String.format("00000000-0000-4000-8000-%012d", node + 1), row[1].toString()); + UUID.fromString(row[1].toString()); // ensure there is a legit repair_start_ts and repair_finish_ts sdf.parse(row[2].toString()); sdf.parse(row[3].toString()); diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index 91d86d2c811b..3492eeba23fa 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -206,6 +206,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DRIVER_CONNECTION_TIMEOUT_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DRIVER_READ_TIMEOUT_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_RANDOM_SEED; @@ -459,6 +460,7 @@ protected static void prePrepareServer() StorageService.instance.registerMBeans(); StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); SnapshotManager.instance.registerMBean(); + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); } // So derived classes can get enough intialization to start setting DatabaseDescriptor options diff --git a/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java b/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java index 0c3eeb37b012..76aded0e0476 100644 --- a/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java +++ b/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java @@ -20,6 +20,7 @@ import java.util.Collections; +import org.apache.cassandra.config.DatabaseDescriptor; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -50,7 +51,7 @@ public class CassandraSreamReceiverTest extends CQLTester @BeforeClass public static void beforeClass() { - System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); + DatabaseDescriptor.setCDCOnRepairEnabled(false); } @Before @@ -109,15 +110,11 @@ public void testRequiresWritePathRepairMVOnly() @Test public void testRequiresWritePathRepairCDCWithSystemProp() { - System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); - ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_TABLE); when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); - + DatabaseDescriptor.setCDCOnRepairEnabled(true); assertTrue(receiver.requiresWritePath(cfs)); - - System.clearProperty("cassandra.streaming.requires_cdc_replay"); } @Test @@ -128,8 +125,8 @@ public void testDoesNotRequiresWritePathRepairCDCOnly() CassandraStreamReceiver receiver1 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); assertFalse(receiver1.requiresWritePath(cfs)); - System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); CassandraStreamReceiver receiver2 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + DatabaseDescriptor.setCDCOnRepairEnabled(true); assertTrue(receiver2.requiresWritePath(cfs)); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index a435688c3965..020116a0bc7d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -28,7 +28,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; import org.apache.cassandra.config.DurationSpec; @@ -37,8 +36,6 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.schema.AutoRepairParams; -import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Pair; @@ -50,21 +47,15 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.db.marshal.IntegerType; -import org.apache.cassandra.db.marshal.UTF8Type; -import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.metrics.AutoRepairMetricsManager; import org.apache.cassandra.metrics.AutoRepairMetrics; -import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.AutoRepairService; @@ -120,48 +111,26 @@ public static Collection repairTypes() @BeforeClass public static void setupClass() throws Exception { - AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); setAutoRepairEnabled(true); requireNetwork(); AutoRepairUtils.setup(); - - - cfm = TableMetadata.builder(KEYSPACE, TABLE) - .addPartitionKeyColumn("k", UTF8Type.instance) - .addStaticColumn("s", UTF8Type.instance) - .addClusteringColumn("i", IntegerType.instance) - .addRegularColumn("v", UTF8Type.instance) - .params(TableParams.builder().automatedRepairFull(AutoRepairParams.create(AutoRepairConfig.RepairType.full, ImmutableMap.of(AutoRepairParams.Option.ENABLED.toString(), Boolean.toString(true)))). - automatedRepairIncremental(AutoRepairParams.create(AutoRepairConfig.RepairType.incremental, ImmutableMap.of(AutoRepairParams.Option.ENABLED.toString(), Boolean.toString(true)))).build()) - .build(); - - cfmDisabledAutoRepair = TableMetadata.builder(KEYSPACE, TABLE_DISABLED_AUTO_REPAIR) - .addPartitionKeyColumn("k", UTF8Type.instance) - .addStaticColumn("s", UTF8Type.instance) - .addClusteringColumn("i", IntegerType.instance) - .addRegularColumn("v", UTF8Type.instance) - .params(TableParams.builder().automatedRepairFull(AutoRepairParams.create(AutoRepairConfig.RepairType.full, ImmutableMap.of(AutoRepairParams.Option.ENABLED.toString(), Boolean.toString(false)))). - automatedRepairIncremental(AutoRepairParams.create(AutoRepairConfig.RepairType.incremental, ImmutableMap.of(AutoRepairParams.Option.ENABLED.toString(), Boolean.toString(false)))).build()) - .build(); - - SchemaLoader.prepareServer(); - SchemaLoader.createKeyspace(KEYSPACE, KeyspaceParams.simple(1), cfm, cfmDisabledAutoRepair); - cfm = Schema.instance.getTableMetadata(KEYSPACE, TABLE); - cfmDisabledAutoRepair = Schema.instance.getTableMetadata(KEYSPACE, TABLE_DISABLED_AUTO_REPAIR); - keyspace = Keyspace.open(KEYSPACE); - DatabaseDescriptor.setMaterializedViewsEnabled(true); - QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + - "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); - - DatabaseDescriptor.setMaterializedViewsEnabled(false); + StorageService.instance.doAutoRepairSetup(); DatabaseDescriptor.setCDCEnabled(false); } @Before public void setup() { - System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", KEYSPACE, TABLE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH automated_repair_full = {'enabled': 'false'} AND automated_repair_incremental = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); + + QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); MockitoAnnotations.initMocks(this); @@ -184,13 +153,17 @@ public void setup() resetConfig(); AutoRepair.shuffleFunc = java.util.Collections::shuffle; + + keyspace = Keyspace.open(KEYSPACE); + cfm = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).metadata(); + cfmDisabledAutoRepair = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE_DISABLED_AUTO_REPAIR).metadata(); + DatabaseDescriptor.setCDCOnRepairEnabled(false); } @After public void tearDown() { System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); - System.clearProperty("cassandra.streaming.requires_cdc_replay"); } private void resetCounters() @@ -606,15 +579,18 @@ public void testRepairShufflesKeyspacesAndTables() AtomicInteger shuffleKeyspacesCall = new AtomicInteger(); AtomicInteger shuffleTablesCall = new AtomicInteger(); AutoRepair.shuffleFunc = (List list) -> { - assertTrue(list.get(0) instanceof Keyspace || list.get(0) instanceof String); - if (list.get(0) instanceof Keyspace) - { - shuffleKeyspacesCall.getAndIncrement(); - assertFalse(list.isEmpty()); - } - else if (list.get(0) instanceof String) + if (!list.isEmpty()) { - shuffleTablesCall.getAndIncrement(); + assertTrue(list.get(0) instanceof Keyspace || list.get(0) instanceof String); + if (list.get(0) instanceof Keyspace) + { + shuffleKeyspacesCall.getAndIncrement(); + assertFalse(list.isEmpty()); + } + else if (list.get(0) instanceof String) + { + shuffleTablesCall.getAndIncrement(); + } } }; @@ -623,7 +599,7 @@ else if (list.get(0) instanceof String) AutoRepair.instance.repair(repairType); assertEquals(1, shuffleKeyspacesCall.get()); - assertEquals(4, shuffleTablesCall.get()); + assertEquals(5, shuffleTablesCall.get()); } @Test @@ -665,7 +641,7 @@ public void testRepairMaxRetries() // system_auth.resource_role_permissons_index,system_traces.sessions,system_traces.events,ks.tbl, // system_distributed.auto_repair_priority,system_distributed.repair_history,system_distributed.auto_repair_history, // system_distributed.view_build_status,system_distributed.parent_repair_history,system_distributed.partition_denylist - int exptedTablesGoingThroughRepair = 14; + int exptedTablesGoingThroughRepair = 18; assertEquals(config.getRepairMaxRetries()*exptedTablesGoingThroughRepair, sleepCalls.get()); verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); @@ -695,7 +671,7 @@ public void testRepairSuccessAfterRetry() AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(14); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(18); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } @@ -728,7 +704,7 @@ public void testRepairThrowsForIRWithMVReplay() public void testRepairThrowsForIRWithCDCReplay() { AutoRepair.instance.setup(); - System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); + DatabaseDescriptor.setCDCOnRepairEnabled(true); if (repairType == AutoRepairConfig.RepairType.incremental) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 6137983b3ea7..eefc4556697a 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -54,7 +54,6 @@ public void setup() { AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); - System.clearProperty("cassandra.streaming.requires_cdc_replay"); } @Test @@ -76,9 +75,11 @@ public void testSetup() @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithCDCReplay() { + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); - System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); DatabaseDescriptor.setCDCEnabled(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); AutoRepair instance = new AutoRepair(); instance.setup(); @@ -89,6 +90,7 @@ public void testSetupFailsWhenIREnabledWithMVReplay() { DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); + DatabaseDescriptor.setCDCOnRepairEnabled(false); AutoRepair instance = new AutoRepair(); instance.setup(); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 1e9758f53240..539df156ce7a 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -23,20 +23,16 @@ import java.util.TreeSet; import java.util.UUID; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.marshal.UUIDType; -import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.IEndpointSnitch; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Locator; @@ -46,9 +42,7 @@ import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; @@ -92,18 +86,16 @@ public static void setupClass() throws Exception defaultSnitch = DatabaseDescriptor.getLocator(); localEndpoint = FBUtilities.getBroadcastAddressAndPort(); hostId = StorageService.instance.getHostIdForEndpoint(localEndpoint); - AutoRepairUtils.setup(); - SchemaLoader.prepareServer(); - SchemaLoader.createKeyspace("ks", KeyspaceParams.create(false, - ImmutableMap.of("class", "NetworkTopologyStrategy", "datacenter1", "1")), - TableMetadata.builder("ks", "tbl") - .addPartitionKeyColumn("k", UTF8Type.instance) - .build()); + StorageService.instance.doAutoRepairSetup(); } @Before public void setup() { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", "ks")); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", "ks", "tbl")); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); MockitoAnnotations.initMocks(this); QueryProcessor.executeInternal(String.format( diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java new file mode 100644 index 000000000000..826a7187219c --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; + +import static org.junit.Assert.assertEquals; + +public class AutoRepairServiceBasicTest extends CQLTester { + private static AutoRepairService autoRepairService; + private static AutoRepairConfig config; + + @Before + public void setUp() { + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); + config = new AutoRepairConfig(); + autoRepairService = new AutoRepairService(); + autoRepairService.config = config; + } + + @After + public void tearDown() { + System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); + } + + @Test + public void testSetup() { + AutoRepairService.instance.config = null; + + AutoRepairService.setup(); + + assertEquals(DatabaseDescriptor.getAutoRepairConfig(), AutoRepairService.instance.config); + } + + @Test + public void testGetAutoRepairConfigReturnsConfig() { + assertEquals(config, autoRepairService.getAutoRepairConfig()); + } + + @Test + public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() { + autoRepairService.setAutoRepairHistoryClearDeleteHostsBufferDuration("100s"); + + assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); + } + + + @Test + public void testsetAutoRepairMaxRetriesCount() { + autoRepairService.setAutoRepairMaxRetriesCount(101); + + assertEquals(101, config.getRepairMaxRetries()); + } + + + @Test + public void testsetAutoRepairRetryBackoffInSec() { + autoRepairService.setAutoRepairRetryBackoff("102s"); + + assertEquals(102, config.getRepairRetryBackoff().toSeconds()); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { + autoRepairService.config = new AutoRepairConfig(false); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { + autoRepairService.config = new AutoRepairConfig(true); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setMaterializedViewsEnabled(true); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } +} diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java new file mode 100644 index 000000000000..7c8645149adc --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import com.google.common.collect.ImmutableSet; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; +import java.util.UUID; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.junit.Assert.assertEquals; + +@RunWith(Parameterized.class) +public class AutoRepairServiceRepairTypeTest extends CQLTester { + @Parameterized.Parameter() + public AutoRepairConfig.RepairType repairType; + + private final UUID host1 = UUID.fromString("00000000-0000-0000-0000-000000000001"); + private final UUID host2 = UUID.fromString("00000000-0000-0000-0000-000000000002"); + + private AutoRepairService instance; + + @Parameterized.Parameters(name = "repairType={0}") + public static Collection repairTypes() { + return Arrays.asList(AutoRepairConfig.RepairType.values()); + } + + + @BeforeClass + public static void setupClass() throws Exception { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + setAutoRepairEnabled(true); + requireNetwork(); + } + + @Before + public void setUpTest() { + AutoRepairUtils.setup(); + instance = new AutoRepairService(); + } + + @Test + public void testGetOnGoingRepairHostIdsTest() { + long now = System.currentTimeMillis(); + AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); + AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); + + Set hosts = instance.getOnGoingRepairHostIds(repairType); + + assertEquals(ImmutableSet.of(host1.toString(), host2.toString()), hosts); + } +} diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java new file mode 100644 index 000000000000..31aae67c557e --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import com.google.common.collect.ImmutableSet; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.schema.SchemaConstants; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; +import java.util.UUID; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertEquals; + +@RunWith(Parameterized.class) +public class AutoRepairServiceSetterTest extends CQLTester { + private static final AutoRepairConfig config = new AutoRepairConfig(true); + + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public T arg; + + @Parameterized.Parameter(2) + public BiConsumer setter; + + @Parameterized.Parameter(3) + public Function getter; + + @Parameterized.Parameters(name = "{index}: repairType={0}, arg={1}") + public static Collection testCases() { + DatabaseDescriptor.setConfig(DatabaseDescriptor.loadConfig()); + return Stream.of( + forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), + forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), + forEachRepairType(200, AutoRepairService.instance::setRepairSubRangeNum, config::getRepairSubRangeNum), + forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), + forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), + forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), + forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentageInGroup, config::getParallelRepairPercentage), + forEachRepairType(700, AutoRepairService.instance::setParallelRepairCountInGroup, config::getParallelRepairCount), + forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), + forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), + forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, AutoRepairServiceSetterTest::isLocalHostForceRepair) + ).flatMap(Function.identity()).collect(Collectors.toList()); + } + + private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) { + UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); + UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( + "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); + + if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) { + return ImmutableSet.of(InetAddressAndPort.getLocalHost()); + } + return ImmutableSet.of(); + } + + private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { + Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { + testCases[repairType.ordinal()] = new Object[]{repairType, arg, setter, getter}; + } + + return Arrays.stream(testCases); + } + + @BeforeClass + public static void setup() throws Exception { + DatabaseDescriptor.daemonInitialization(); + setAutoRepairEnabled(true); + requireNetwork(); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); + AutoRepairUtils.setup(); + AutoRepairService.instance.config = config; + } + + @Before + public void prepare() { + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY)); + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY)); + } + + @Test + public void testSettersTest() { + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + setter.accept(repairType, arg); + assertEquals(arg, getter.apply(repairType)); + } +} diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java deleted file mode 100644 index e3c9c111db61..000000000000 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceTest.java +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Set; -import java.util.UUID; -import java.util.function.BiConsumer; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import com.google.common.collect.ImmutableSet; -import org.junit.After; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Suite; - -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.cql3.UntypedResultSet; -import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.gms.Gossiper; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.utils.FBUtilities; - -import static org.apache.cassandra.Util.setAutoRepairEnabled; -import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.junit.Assert.assertEquals; - -@RunWith(Suite.class) -@Suite.SuiteClasses({ AutoRepairServiceTest.BasicTests.class, AutoRepairServiceTest.SetterTests.class }) -public class AutoRepairServiceTest -{ - public static class BasicTests extends CQLTester - { - private static AutoRepairService autoRepairService; - private static AutoRepairConfig config; - - @Before - public void setUp() - { - System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); - DatabaseDescriptor.setMaterializedViewsEnabled(false); - DatabaseDescriptor.setCDCEnabled(false); - config = new AutoRepairConfig(); - autoRepairService = new AutoRepairService(); - autoRepairService.config = config; - } - - @After - public void tearDown() - { - System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); - System.clearProperty("cassandra.streaming.requires_cdc_replay"); - } - - @Test - public void testSetup() - { - AutoRepairService.instance.config = null; - - AutoRepairService.setup(); - - assertEquals(DatabaseDescriptor.getAutoRepairConfig(), AutoRepairService.instance.config); - } - - @Test - public void testGetAutoRepairConfigReturnsConfig() - { - assertEquals(config, autoRepairService.getAutoRepairConfig()); - } - - @Test - public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() - { - autoRepairService.setAutoRepairHistoryClearDeleteHostsBufferDuration("100s"); - - assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); - } - - - @Test - public void testsetAutoRepairMaxRetriesCount() - { - autoRepairService.setAutoRepairMaxRetriesCount(101); - - assertEquals(101, config.getRepairMaxRetries()); - } - - - @Test - public void testsetAutoRepairRetryBackoffInSec() - { - autoRepairService.setAutoRepairRetryBackoff("102s"); - - assertEquals(102, config.getRepairRetryBackoff().toSeconds()); - } - - @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() - { - autoRepairService.config = new AutoRepairConfig(false); - - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - - @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() - { - autoRepairService.config = new AutoRepairConfig(true); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); - - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - - @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() - { - autoRepairService.config = new AutoRepairConfig(true); - DatabaseDescriptor.setMaterializedViewsEnabled(true); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); - System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); - - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - - @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() - { - autoRepairService.config = new AutoRepairConfig(true); - System.setProperty("cassandra.streaming.requires_cdc_replay", "true"); - - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - - @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() - { - autoRepairService.config = new AutoRepairConfig(true); - DatabaseDescriptor.setCDCEnabled(true); - System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); - - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - } - - @RunWith(Parameterized.class) - public static class RepairTypeTests extends CQLTester - { - @Parameterized.Parameter() - public AutoRepairConfig.RepairType repairType; - - private final UUID host1 = UUID.fromString("00000000-0000-0000-0000-000000000001"); - private final UUID host2 = UUID.fromString("00000000-0000-0000-0000-000000000002"); - - private AutoRepairService instance; - - @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() - { - return Arrays.asList(AutoRepairConfig.RepairType.values()); - } - - - @BeforeClass - public static void setupClass() throws Exception - { - SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - setAutoRepairEnabled(true); - requireNetwork(); - } - - @Before - public void setUpTest() - { - AutoRepairUtils.setup(); - instance = new AutoRepairService(); - } - - @Test - public void testGetOnGoingRepairHostIds() - { - long now = System.currentTimeMillis(); - AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); - AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); - - Set hosts = instance.getOnGoingRepairHostIds(repairType); - - assertEquals(ImmutableSet.of(host1.toString(), host2.toString()), hosts); - } - } - - @RunWith(Parameterized.class) - public static class SetterTests extends CQLTester - { - private static final AutoRepairConfig config = new AutoRepairConfig(true); - - @Parameterized.Parameter - public AutoRepairConfig.RepairType repairType; - - @Parameterized.Parameter(1) - public T arg; - - @Parameterized.Parameter(2) - public BiConsumer setter; - - @Parameterized.Parameter(3) - public Function getter; - - @Parameterized.Parameters(name = "{index}: repairType={0}, arg={1}") - public static Collection testCases() - { - return Stream.of( - forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), - forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), - forEachRepairType(200, AutoRepairService.instance::setRepairSubRangeNum, config::getRepairSubRangeNum), - forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), - forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), - forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), - forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentageInGroup, config::getParallelRepairPercentage), - forEachRepairType(700, AutoRepairService.instance::setParallelRepairCountInGroup, config::getParallelRepairCount), - forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), - forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), - forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, SetterTests::isLocalHostForceRepair) - ).flatMap(Function.identity()).collect(Collectors.toList()); - } - - private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) - { - UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); - UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( - "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); - - if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) - { - return ImmutableSet.of(InetAddressAndPort.getLocalHost()); - } - return ImmutableSet.of(); - } - - private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) - { - Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) - { - testCases[repairType.ordinal()] = new Object[]{ repairType, arg, setter, getter }; - } - - return Arrays.stream(testCases); - } - - @BeforeClass - public static void setup() throws Exception - { - setAutoRepairEnabled(true); - requireNetwork(); - DatabaseDescriptor.setMaterializedViewsEnabled(false); - DatabaseDescriptor.setCDCEnabled(false); - AutoRepairUtils.setup(); - AutoRepairService.instance.config = config; - } - - @Before - public void prepare() - { - QueryProcessor.executeInternal(String.format( - "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY)); - QueryProcessor.executeInternal(String.format( - "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY)); - } - - @Test - public void testSetters() - { - System.setProperty("cassandra.streaming.requires_cdc_replay", "false"); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); - setter.accept(repairType, arg); - assertEquals(arg, getter.apply(repairType)); - } - } -} From 55b706c5c3d67f697efcc6d76512e1f577f36488 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 30 Sep 2024 11:59:25 -0700 Subject: [PATCH 029/257] Fix unit test cases DescribeStatementTest & SchemaCQLHelperTest --- .../cassandra/cql3/statements/DescribeStatementTest.java | 8 ++++++-- .../unit/org/apache/cassandra/db/SchemaCQLHelperTest.java | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index 8529d8ff8073..e16503b0172a 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -1143,7 +1143,9 @@ private static String tableParametersCql() " AND read_repair = 'BLOCKING'\n" + " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + - " AND speculative_retry = '99p';"; + " AND speculative_retry = '99p'\n" + + " AND automated_repair_full = {'enabled': 'true'}\n" + + " AND automated_repair_incremental = {'enabled': 'true'};"; } private static String cqlQuoted(Map map) @@ -1170,7 +1172,9 @@ private static String mvParametersCql() " AND memtable_flush_period_in_ms = 0\n" + " AND min_index_interval = 128\n" + " AND read_repair = 'BLOCKING'\n" + - " AND speculative_retry = '99p';"; + " AND speculative_retry = '99p'\n" + + " AND automated_repair_full = {'enabled': 'true'}\n" + + " AND automated_repair_incremental = {'enabled': 'true'};"; } private static String keyspaceOutput() diff --git a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java index da5f6d1853cb..9ff2ad417545 100644 --- a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java +++ b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java @@ -349,7 +349,9 @@ public void testCfmOptionsCQL() " AND read_repair = 'BLOCKING'\n" + " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + - " AND speculative_retry = 'ALWAYS';" + " AND speculative_retry = 'ALWAYS'\n" + + " AND automated_repair_full = {'enabled': 'true'}\n" + + " AND automated_repair_incremental = {'enabled': 'true'};" )); } From 36442de593afea1b557e28ad8aef9c4d16d6efb9 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 30 Sep 2024 13:42:40 -0700 Subject: [PATCH 030/257] Safeguard the setup call to ensure it it done only once --- .../repair/autorepair/AutoRepair.java | 37 ++++++++++++------- .../repair/autorepair/AutoRepairTest.java | 27 ++++++++++++-- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index d59e4cfea643..8f3307e8ee07 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -90,6 +90,7 @@ public class AutoRepair protected final Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); + private boolean isSetupDone = false; @VisibleForTesting protected AutoRepair() @@ -109,20 +110,30 @@ protected AutoRepair() public void setup() { - AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); - AutoRepairService.setup(); - AutoRepairUtils.setup(); - - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + // Ensure setup is done only once; this is only for unit tests + // For production, this method should be called only once. + synchronized (this) { - if (config.isAutoRepairEnabled(repairType)) - AutoRepairService.instance.checkCanRun(repairType); - - repairExecutors.get(repairType).scheduleWithFixedDelay( - () -> repair(repairType), - config.getInitialSchedulerDelay(repairType).toSeconds(), - config.getRepairCheckInterval().toSeconds(), - TimeUnit.SECONDS); + if (isSetupDone) + { + return; + } + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); + AutoRepairService.setup(); + AutoRepairUtils.setup(); + + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + if (config.isAutoRepairEnabled(repairType)) + AutoRepairService.instance.checkCanRun(repairType); + + repairExecutors.get(repairType).scheduleWithFixedDelay( + () -> repair(repairType), + config.getInitialSchedulerDelay(repairType).toSeconds(), + config.getRepairCheckInterval().toSeconds(), + TimeUnit.SECONDS); + } + isSetupDone = true; } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index eefc4556697a..c72de0ecc7e6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -66,9 +66,30 @@ public void testSetup() assertEquals(RepairType.values().length, instance.repairExecutors.size()); for (RepairType repairType : instance.repairExecutors.keySet()) { - assertEquals(String.format("Expected 1 task in queue for %s", repairType), - 1, instance.repairExecutors.get(repairType).getPendingTaskCount() - + instance.repairExecutors.get(repairType).getActiveTaskCount()); + int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() + + instance.repairExecutors.get(repairType).getActiveTaskCount(); + assertEquals(String.format("Expected 1 task in queue for %s but was %s", repairType, expectedTasks), + 1, expectedTasks); + } + } + + @Test + public void testSafeGuardSetupCall() + { + AutoRepair instance = new AutoRepair(); + + // only one should be setup, and rest should be ignored + instance.setup(); + instance.setup(); + instance.setup(); + + assertEquals(RepairType.values().length, instance.repairExecutors.size()); + for (RepairType repairType : instance.repairExecutors.keySet()) + { + int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() + + instance.repairExecutors.get(repairType).getActiveTaskCount(); + assertEquals(String.format("Expected 1 task in queue for %s but was %s", repairType, expectedTasks), + 1, expectedTasks); } } From 33d6e4963ce853f57d4cd0f374367acb3e8c2bb4 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 30 Sep 2024 20:54:49 -0700 Subject: [PATCH 031/257] Fix InJvm test AutoRepairSchedulerTest --- .../test/repair/AutoRepairSchedulerTest.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 5a49f669dc34..ca96995805f1 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -21,12 +21,13 @@ import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.UUID; import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.cassandra.config.DatabaseDescriptor; +import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -80,6 +81,13 @@ public static void init() throws IOException cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + } + + @AfterClass + public static void afterClass() + { + System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); } @Test From 42f32c044252d2ad800976db98e19b9a547e3751 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 1 Oct 2024 09:35:28 -0700 Subject: [PATCH 032/257] Unit test failures AutoRepairTest.java --- .../test/repair/AutoRepairSchedulerTest.java | 1 - .../repair/autorepair/AutoRepairTest.java | 16 ++++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index ca96995805f1..027a3438f0ff 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -81,7 +81,6 @@ public static void init() throws IOException cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); - DatabaseDescriptor.setCDCOnRepairEnabled(false); } @AfterClass diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index c72de0ecc7e6..1499d1b0dc56 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -21,11 +21,12 @@ import java.util.HashMap; import java.util.Map; +import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; - import org.junit.Assert; + import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.CQLTester; @@ -53,6 +54,15 @@ public static void setupClass() throws Exception public void setup() { AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.full, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + } + + @After + public void after() + { System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); } @@ -60,7 +70,6 @@ public void setup() public void testSetup() { AutoRepair instance = new AutoRepair(); - instance.setup(); assertEquals(RepairType.values().length, instance.repairExecutors.size()); @@ -86,8 +95,7 @@ public void testSafeGuardSetupCall() assertEquals(RepairType.values().length, instance.repairExecutors.size()); for (RepairType repairType : instance.repairExecutors.keySet()) { - int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() - + instance.repairExecutors.get(repairType).getActiveTaskCount(); + int expectedTasks = instance.repairExecutors.get(repairType).getCorePoolSize(); assertEquals(String.format("Expected 1 task in queue for %s but was %s", repairType, expectedTasks), 1, expectedTasks); } From 233640f6e16df4fbb6c1f3b69700c700b82ee66d Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 3 Oct 2024 14:59:04 -0700 Subject: [PATCH 033/257] 5.0 formatting --- .circleci/config.yml | 36 ++++---- .../org/apache/cassandra/config/Config.java | 5 +- .../config/YamlConfigurationLoader.java | 1 - .../db/streaming/CassandraStreamReceiver.java | 5 +- .../repair/autorepair/AutoRepairKeyspace.java | 84 ------------------- .../repair/autorepair/AutoRepairUtils.java | 25 +++--- .../schema/SystemDistributedKeyspace.java | 33 +++++++- .../apache/cassandra/schema/TableParams.java | 3 +- .../service/ActiveRepairService.java | 27 +++--- .../cassandra/service/StorageService.java | 4 +- .../tcm/transformations/cms/Initialize.java | 1 - .../apache/cassandra/utils/FBUtilities.java | 3 +- .../test/repair/AutoRepairSchedulerTest.java | 9 +- .../autorepair/AutoRepairKeyspaceTest.java | 38 +++------ .../AutoRepairParameterizedTest.java | 12 +-- .../repair/autorepair/AutoRepairTest.java | 7 +- .../autorepair/AutoRepairUtilsTest.java | 53 ++++++------ .../service/ActiveRepairServiceTest.java | 2 - .../service/AutoRepairServiceBasicTest.java | 13 +-- .../service/AutoRepairServiceSetterTest.java | 11 +-- 20 files changed, 151 insertions(+), 221 deletions(-) delete mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java diff --git a/.circleci/config.yml b/.circleci/config.yml index 553e0e32a8a7..9da3ea0f9aea 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -220,7 +220,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -338,7 +338,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -625,7 +625,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -3788,7 +3788,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -4100,7 +4100,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - run: name: Log Environment Information @@ -4413,7 +4413,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -4530,7 +4530,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -4781,7 +4781,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -4885,7 +4885,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -5046,7 +5046,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -5278,7 +5278,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -6444,7 +6444,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -6997,7 +6997,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -7502,7 +7502,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -7843,7 +7843,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -7914,7 +7914,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - run: name: Log Environment Information @@ -8196,7 +8196,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra @@ -8833,7 +8833,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 1 + parallelism: 4 steps: - attach_workspace: at: /home/cassandra diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 32f195c43941..3aaec1b8a8ab 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -369,7 +369,10 @@ public MemtableOptions() // The number of executors to use for building secondary indexes public volatile int concurrent_index_builders = 2; - public volatile double incremental_repair_disk_headroom_reject_ratio = 0.2; // at least 20% of disk must be unused to run incremental repair + + // at least 20% of disk must be unused to run incremental repair + // if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) then set the ratio to 0.0 + public volatile double incremental_repair_disk_headroom_reject_ratio = 0.2; /** * @deprecated retry support removed on CASSANDRA-10992 diff --git a/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java b/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java index f37a42e8fa54..0accb0d75f9f 100644 --- a/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java +++ b/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java @@ -461,4 +461,3 @@ public static LoaderOptions getDefaultLoaderOptions() return loaderOptions; } } - diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index c53ad5387e9a..3dd3bc40f179 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -23,10 +23,10 @@ import java.util.List; import java.util.Set; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; -import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; import org.apache.cassandra.io.sstable.SSTable; @@ -40,7 +40,6 @@ import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.compaction.OperationType; import org.apache.cassandra.db.filter.ColumnFilter; -import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; import org.apache.cassandra.db.lifecycle.LifecycleTransaction; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.ThrottledUnfilteredIterator; @@ -50,7 +49,6 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.ISSTableScanner; -import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableMultiWriter; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.service.accord.AccordService; @@ -210,6 +208,7 @@ private boolean cdcRequiresWriteCommitLog(ColumnFamilyStore cfs) * For CDC-enabled tables and write path for CDC is enabled, we want to ensure that the mutations are * run through the CommitLog, so they can be archived by the CDC process on discard. */ + @VisibleForTesting public boolean requiresWritePath(ColumnFamilyStore cfs) { return cdcRequiresWriteCommitLog(cfs) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java deleted file mode 100644 index 5afd2fff3275..000000000000 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairKeyspace.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.repair.autorepair; - -import java.util.concurrent.TimeUnit; - -import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; -import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.Tables; - -public class AutoRepairKeyspace -{ - private AutoRepairKeyspace() - { - } - - /** - * Generation is used as a timestamp for automatic table creation on startup. - * If you make any changes to the tables below, make sure to increment the - * generation and document your change here. - */ - public static final long GENERATION = 0; - - public static final String AUTO_REPAIR_HISTORY = "auto_repair_history"; - - public static final String AUTO_REPAIR_PRIORITY = "auto_repair_priority"; - - public static final TableMetadata AutoRepairHistory = - parse(AUTO_REPAIR_HISTORY, - "Auto repair history for each node", - "CREATE TABLE %s (" - + "host_id uuid," - + "repair_type text," - + "repair_turn text," - + "repair_start_ts timestamp," - + "repair_finish_ts timestamp," - + "delete_hosts set," - + "delete_hosts_update_time timestamp," - + "force_repair boolean," - + "PRIMARY KEY (repair_type, host_id))"); - - public static final TableMetadata AutoRepairPriority = - parse(AUTO_REPAIR_PRIORITY, - "Auto repair priority for each group", - "CREATE TABLE %s (" - + "repair_type text," - + "repair_priority set," - + "PRIMARY KEY (repair_type))"); - - private static TableMetadata parse(String name, String description, String schema) - { - return CreateTableStatement.parse(String.format(schema, name), SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) - .id(TableId.forSystemTable(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, name)) - .comment(description) - .gcGraceSeconds((int) TimeUnit.DAYS.toSeconds(90)) - .build(); - } - - public static KeyspaceMetadata metadata() - { - Tables tables = Tables.of(AutoRepairHistory, AutoRepairPriority); - return KeyspaceMetadata.create(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, KeyspaceParams.simple(1), tables); - } -} diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 007a96e1d6d6..73fc5ab10036 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -54,6 +54,7 @@ import org.apache.cassandra.locator.NetworkTopologyStrategy; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.serializers.SetSerializer; @@ -97,56 +98,56 @@ public class AutoRepairUtils final static String SELECT_REPAIR_HISTORY = String.format( "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE); final static String SELECT_REPAIR_PRIORITY = String.format( "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String DEL_REPAIR_PRIORITY = String.format( "DELETE %s[?] FROM %s.%s WHERE %s = ?", COL_REPAIR_PRIORITY, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String ADD_PRIORITY_HOST = String.format( "UPDATE %s.%s SET %s = %s + ? WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String INSERT_NEW_REPAIR_HISTORY = String.format( "INSERT INTO %s.%s (%s, %s, %s, %s, %s, %s) values (?, ? ,?, ?, {}, ?) IF NOT EXISTS", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID, COL_REPAIR_START_TS, COL_REPAIR_FINISH_TS, COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME); final static String ADD_HOST_ID_TO_DELETE_HOSTS = String.format( "UPDATE %s.%s SET %s = %s + ?, %s = ? WHERE %s = ? AND %s = ? IF EXISTS" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME, COL_REPAIR_TYPE, COL_HOST_ID); final static String DEL_AUTO_REPAIR_HISTORY = String.format( "DELETE FROM %s.%s WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); final static String RECORD_START_REPAIR_HISTORY = String.format( "UPDATE %s.%s SET %s= ?, repair_turn = ? WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_START_TS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_START_TS, COL_REPAIR_TYPE, COL_HOST_ID); final static String RECORD_FINISH_REPAIR_HISTORY = String.format( "UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); final static String CLEAR_DELETE_HOSTS = String.format( "UPDATE %s.%s SET %s= {} WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, COL_REPAIR_TYPE, COL_HOST_ID); final static String SET_FORCE_REPAIR = String.format( "UPDATE %s.%s SET %s=true WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_FORCE_REPAIR, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); final static String SELECT_LAST_REPAIR_TIME_FOR_NODE = String.format( "SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?", COL_REPAIR_FINISH_TS, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - AutoRepairKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); + SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); static ModificationStatement delStatementRepairHistory; static SelectStatement selectStatementRepairHistory; diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index c2cd0540af68..1759e18e822e 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -57,8 +57,6 @@ import static java.lang.String.format; -import static org.apache.cassandra.repair.autorepair.AutoRepairKeyspace.AutoRepairHistory; -import static org.apache.cassandra.repair.autorepair.AutoRepairKeyspace.AutoRepairPriority; import static org.apache.cassandra.utils.ByteBufferUtil.bytes; public final class SystemDistributedKeyspace @@ -97,7 +95,11 @@ private SystemDistributedKeyspace() public static final String PARTITION_DENYLIST_TABLE = "partition_denylist"; - public static final Set TABLE_NAMES = ImmutableSet.of(REPAIR_HISTORY, PARENT_REPAIR_HISTORY, VIEW_BUILD_STATUS, PARTITION_DENYLIST_TABLE); + public static final String AUTO_REPAIR_HISTORY = "auto_repair_history"; + + public static final String AUTO_REPAIR_PRIORITY = "auto_repair_priority"; + + public static final Set TABLE_NAMES = ImmutableSet.of(REPAIR_HISTORY, PARENT_REPAIR_HISTORY, VIEW_BUILD_STATUS, PARTITION_DENYLIST_TABLE, AUTO_REPAIR_HISTORY, AUTO_REPAIR_PRIORITY); public static final String REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + "keyspace_name text," @@ -160,6 +162,29 @@ private SystemDistributedKeyspace() private static final TableMetadata PartitionDenylistTable = parse(PARTITION_DENYLIST_TABLE, "Partition keys which have been denied access", PARTITION_DENYLIST_CQL).build(); + public static final String AUTO_REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + + "host_id uuid," + + "repair_type text," + + "repair_turn text," + + "repair_start_ts timestamp," + + "repair_finish_ts timestamp," + + "delete_hosts set," + + "delete_hosts_update_time timestamp," + + "force_repair boolean," + + "PRIMARY KEY (repair_type, host_id))"; + + private static final TableMetadata AutoRepairHistoryTable = + parse(AUTO_REPAIR_HISTORY, "Auto repair history for each node", AUTO_REPAIR_HISTORY_CQL).build(); + + public static final String AUTO_REPAIR_PRIORITY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + + "repair_type text," + + "repair_priority set," + + "PRIMARY KEY (repair_type))"; + + private static final TableMetadata AutoRepairPriorityTable = + parse(AUTO_REPAIR_PRIORITY, "Auto repair priority for each group", AUTO_REPAIR_PRIORITY_CQL).build(); + + private static TableMetadata.Builder parse(String table, String description, String cql) { return CreateTableStatement.parse(format(cql, table), SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) @@ -172,7 +197,7 @@ public static KeyspaceMetadata metadata() { return KeyspaceMetadata.create(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, KeyspaceParams.simple(Math.max(DEFAULT_RF, DatabaseDescriptor.getDefaultKeyspaceRF())), - Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable, AutoRepairHistory, AutoRepairPriority)); + Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable, AutoRepairHistoryTable, AutoRepairPriorityTable)); } public static void startParentRepair(TimeUUID parent_id, String keyspaceName, String[] cfnames, RepairOption options) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 3dabf47da264..7fc70e08a4f8 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -104,8 +104,7 @@ public enum Option TRANSACTIONAL_MIGRATION_FROM, PENDING_DROP, AUTOMATED_REPAIR_FULL, - AUTOMATED_REPAIR_INCREMENTAL, - ; + AUTOMATED_REPAIR_INCREMENTAL; @Override public String toString() diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index 694a5af1a4bb..e39af4339ac5 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -51,13 +51,24 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; +import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.config.Config; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.repair.Scheduler; +import org.apache.cassandra.locator.EndpointsByRange; +import org.apache.cassandra.locator.EndpointsForRange; +import org.apache.cassandra.utils.ExecutorUtils; +import org.apache.cassandra.repair.state.CoordinatorState; +import org.apache.cassandra.repair.state.ParticipateState; +import org.apache.cassandra.repair.state.ValidationState; +import org.apache.cassandra.utils.Simulate; +import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.utils.TimeUUID; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.concurrent.ExecutorPlus; -import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.IPartitioner; @@ -72,8 +83,6 @@ import org.apache.cassandra.gms.IEndpointStateChangeSubscriber; import org.apache.cassandra.gms.IFailureDetectionEventListener; import org.apache.cassandra.gms.VersionedValue; -import org.apache.cassandra.locator.EndpointsByRange; -import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.RepairMetrics; import org.apache.cassandra.net.Message; @@ -84,7 +93,6 @@ import org.apache.cassandra.repair.RepairJobDesc; import org.apache.cassandra.repair.RepairParallelism; import org.apache.cassandra.repair.RepairSession; -import org.apache.cassandra.repair.Scheduler; import org.apache.cassandra.repair.SharedContext; import org.apache.cassandra.repair.consistent.CoordinatorSessions; import org.apache.cassandra.repair.consistent.LocalSessions; @@ -99,9 +107,6 @@ import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.repair.messages.SyncResponse; import org.apache.cassandra.repair.messages.ValidationResponse; -import org.apache.cassandra.repair.state.CoordinatorState; -import org.apache.cassandra.repair.state.ParticipateState; -import org.apache.cassandra.repair.state.ValidationState; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; @@ -110,12 +115,8 @@ import org.apache.cassandra.service.paxos.cleanup.PaxosCleanup; import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.streaming.PreviewKind; -import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.MerkleTrees; import org.apache.cassandra.utils.Pair; -import org.apache.cassandra.utils.Simulate; -import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.FutureCombiner; diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index bce80b11b6d4..e962e70a6017 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -472,12 +472,12 @@ public enum Mode { STARTING, NORMAL, JOINING, JOINING_FAILED, LEAVING, DECOMMISS private volatile int totalCFs, remainingCFs; - public static final AtomicInteger nextRepairCommand = new AtomicInteger(); - private final List lifecycleSubscribers = new CopyOnWriteArrayList<>(); private final String jmxObjectName; + public static final AtomicInteger nextRepairCommand = new AtomicInteger(); + // true when keeping strict consistency while bootstrapping public static final boolean useStrictConsistency = CONSISTENT_RANGE_MOVEMENT.getBoolean(); private boolean joinRing = JOIN_RING.getBoolean(); diff --git a/src/java/org/apache/cassandra/tcm/transformations/cms/Initialize.java b/src/java/org/apache/cassandra/tcm/transformations/cms/Initialize.java index 635a7942260b..11c2ed4d31af 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/cms/Initialize.java +++ b/src/java/org/apache/cassandra/tcm/transformations/cms/Initialize.java @@ -24,7 +24,6 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.MetaStrategy; -import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.SystemDistributedKeyspace; diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 5f5cfb7b6aea..63f353724c9b 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -65,6 +65,8 @@ import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; + +import org.apache.cassandra.io.util.File; import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -88,7 +90,6 @@ import org.apache.cassandra.io.sstable.metadata.MetadataType; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputBufferFixed; -import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.security.AbstractCryptoProvider; diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 027a3438f0ff..1c4ebae5a34e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -26,7 +26,9 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -37,7 +39,6 @@ import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.repair.autorepair.AutoRepair; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.junit.Assert.assertEquals; @@ -51,7 +52,7 @@ public class AutoRepairSchedulerTest extends TestBaseImpl @BeforeClass public static void init() throws IOException { - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); // Define the expected date format pattern String pattern = "EEE MMM dd HH:mm:ss z yyyy"; @@ -93,7 +94,7 @@ public static void afterClass() public void testScheduler() throws ParseException { // ensure there was no history of previous repair runs through the scheduler - Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s", DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY), ConsistencyLevel.QUORUM); + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY), ConsistencyLevel.QUORUM); assertEquals(0, rows.length); cluster.forEach(i -> i.runOnInstance(() -> { @@ -116,7 +117,7 @@ public void testScheduler() throws ParseException private void validate(String repairType) throws ParseException { - Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s where repair_type='%s'", DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, repairType), ConsistencyLevel.QUORUM); + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s where repair_type='%s'", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType), ConsistencyLevel.QUORUM); assertEquals(3, rows.length); for (int node = 0; node < rows.length; node++) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java index 43b8e4a5f4e8..1337cf3dd2d3 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -18,49 +18,37 @@ package org.apache.cassandra.repair.autorepair; -import java.util.Optional; +import java.util.HashSet; +import java.util.Iterator; import java.util.Set; -import com.google.common.collect.ImmutableSet; - +import org.apache.cassandra.schema.*; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.config.DurationSpec; -import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.schema.TableMetadata; - -import static org.apache.cassandra.Util.setAutoRepairEnabled; public class AutoRepairKeyspaceTest { - private static final Set tables = ImmutableSet.of( - AutoRepairKeyspace.AUTO_REPAIR_HISTORY, - AutoRepairKeyspace.AUTO_REPAIR_PRIORITY - ); - @BeforeClass public static void setupDatabaseDescriptor() { DatabaseDescriptor.daemonInitialization(); - AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); } - @Test - public void testMetadataCanParseSchemas() throws Exception + public void testEnsureAutoRepairTablesArePresent() { - setAutoRepairEnabled(true); - KeyspaceMetadata keyspaceMetadata = AutoRepairKeyspace.metadata(); - - assert keyspaceMetadata.tables.size() == tables.size() : "Expected " + tables.size() + " tables, got " + keyspaceMetadata.tables.size(); - - for (String table : tables) + KeyspaceMetadata keyspaceMetadata = SystemDistributedKeyspace.metadata(); + Iterator iter = keyspaceMetadata.tables.iterator(); + Set actualDistributedTablesIter = new HashSet<>(); + while (iter.hasNext()) { - Optional tableMetadata = keyspaceMetadata.tables.get(table); - - assert tableMetadata.isPresent() : "Table " + table + " not found in metadata"; + actualDistributedTablesIter.add(iter.next().name); } + + Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); + Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 020116a0bc7d..836ce294a882 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -30,12 +30,14 @@ import com.google.common.collect.Sets; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.statements.schema.TableAttributes; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Pair; @@ -131,7 +133,7 @@ public void setup() QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); MockitoAnnotations.initMocks(this); Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).truncateBlocking(); @@ -140,8 +142,8 @@ public void setup() Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).truncateBlocking(); Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).disableAutoCompaction(); - Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(AutoRepairKeyspace.AUTO_REPAIR_PRIORITY).truncateBlocking(); - Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(AutoRepairKeyspace.AUTO_REPAIR_HISTORY).truncateBlocking(); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY).truncateBlocking(); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY).truncateBlocking(); AutoRepair.instance = new AutoRepair(); @@ -198,7 +200,7 @@ private void executeCQL() QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, s) VALUES ('k', 's')"); QueryProcessor.executeInternal("SELECT s FROM ks.tbl WHERE k='k'"); Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) - .getColumnFamilyStore(AutoRepairKeyspace.AUTO_REPAIR_PRIORITY) + .getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY) .forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); } @@ -680,7 +682,7 @@ public void testRepairSuccessAfterRetry() public void testRepairThrowsForIRWithMVReplay() { AutoRepair.instance.setup(); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(true); if (repairType == AutoRepairConfig.RepairType.incremental) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 1499d1b0dc56..00c45693659c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -21,6 +21,7 @@ import java.util.HashMap; import java.util.Map; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; @@ -54,7 +55,7 @@ public static void setupClass() throws Exception public void setup() { AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.full, true); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); @@ -104,7 +105,7 @@ public void testSafeGuardSetupCall() @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithCDCReplay() { - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); DatabaseDescriptor.setCDCEnabled(true); @@ -118,7 +119,7 @@ public void testSetupFailsWhenIREnabledWithCDCReplay() public void testSetupFailsWhenIREnabledWithMVReplay() { DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(true); DatabaseDescriptor.setCDCOnRepairEnabled(false); AutoRepair instance = new AutoRepair(); instance.setup(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 539df156ce7a..3629c03041ab 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -24,6 +24,7 @@ import java.util.UUID; import com.google.common.collect.ImmutableSet; +import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -100,10 +101,10 @@ public void setup() MockitoAnnotations.initMocks(this); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); } @Test @@ -111,14 +112,14 @@ public void testSetForceRepair() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); AutoRepairUtils.setForceRepair(repairType, ImmutableSet.of(localEndpoint)); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -132,7 +133,7 @@ public void testSetForceRepairNewNode() UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -145,14 +146,14 @@ public void testClearDeleteHosts() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, delete_hosts, delete_hosts_update_time) VALUES ('%s', %s, { %s }, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId, hostId)); AutoRepairUtils.clearDeleteHosts(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT delete_hosts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -165,7 +166,7 @@ public void testGetAutoRepairHistoryForLocalGroup() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); List history = AutoRepairUtils.getAutoRepairHistory(repairType); @@ -189,19 +190,19 @@ public void testGetCurrentRepairStatus() UUID regularRepair = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair, repair_start_ts) VALUES ('%s', %s, true, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), forceRepair)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, repair_start_ts) VALUES ('%s', %s, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), regularRepair)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString(), regularRepair)); CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType); @@ -251,11 +252,11 @@ public void testGetHostWithLongestUnrepairTime() UUID otherHostId = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, repair_finish_ts) VALUES ('%s', %s, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), otherHostId)); AutoRepairHistory history = AutoRepairUtils.getHostWithLongestUnrepairTime(repairType); @@ -291,14 +292,14 @@ public void testDeleteAutoRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); AutoRepairUtils.deleteAutoRepairHistory(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(0, result.size()); @@ -309,14 +310,14 @@ public void testUpdateStartAutoRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); AutoRepairUtils.updateStartAutoRepairHistory(repairType, hostId, 123, AutoRepairUtils.RepairTurn.MY_TURN); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT repair_start_ts, repair_turn FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -330,14 +331,14 @@ public void testUpdateFinishAutoRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); AutoRepairUtils.updateFinishAutoRepairHistory(repairType, hostId, 123); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT repair_finish_ts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -350,14 +351,14 @@ public void testAddHostIdToDeleteHosts() UUID otherHostId = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), otherHostId)); AutoRepairUtils.addHostIdToDeleteHosts(repairType, hostId, otherHostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), otherHostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -374,7 +375,7 @@ public void testAddPriorityHost() UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString())); assertNotNull(result); assertEquals(1, result.size()); @@ -389,14 +390,14 @@ public void testRemovePriorityStatus() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString(), hostId)); AutoRepairUtils.removePriorityStatus(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString())); assertNotNull(result); assertEquals(1, result.size()); @@ -409,7 +410,7 @@ public void testGetPriorityHosts() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString(), hostId)); Set hosts = AutoRepairUtils.getPriorityHosts(repairType); diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index f5c3f03087d2..cb90cc725295 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -73,8 +73,6 @@ import org.apache.cassandra.tcm.transformations.Register; import org.apache.cassandra.tcm.transformations.UnsafeJoin; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.TimeUUID; -import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.Refs; import org.mockito.Mock; diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index 826a7187219c..704d09fc7dbc 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.service; -import org.junit.After; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.junit.Before; import org.junit.Test; @@ -35,7 +35,7 @@ public class AutoRepairServiceBasicTest extends CQLTester { @Before public void setUp() { - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsEnabled(false); DatabaseDescriptor.setCDCEnabled(false); @@ -44,11 +44,6 @@ public void setUp() { autoRepairService.config = config; } - @After - public void tearDown() { - System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); - } - @Test public void testSetup() { AutoRepairService.instance.config = null; @@ -96,7 +91,7 @@ public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { @Test(expected = ConfigurationException.class) public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { autoRepairService.config = new AutoRepairConfig(true); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "true"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(true); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); } @@ -105,7 +100,7 @@ public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setMaterializedViewsEnabled(true); - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index 31aae67c557e..271d28b926de 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -19,15 +19,16 @@ package org.apache.cassandra.service; import com.google.common.collect.ImmutableSet; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairKeyspace; import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -84,7 +85,7 @@ private static Set isLocalHostForceRepair(AutoRepairConfig.R UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) { return ImmutableSet.of(InetAddressAndPort.getLocalHost()); @@ -116,15 +117,15 @@ public static void setup() throws Exception { public void prepare() { QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_HISTORY)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, AutoRepairKeyspace.AUTO_REPAIR_PRIORITY)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); } @Test public void testSettersTest() { - System.setProperty("cassandra.streaming.requires_view_build_during_repair", "false"); + CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); DatabaseDescriptor.setCDCOnRepairEnabled(false); setter.accept(repairType, arg); assertEquals(arg, getter.apply(repairType)); From 7c489843f89c73f92aecdf3ea9fcd6474e72c54d Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 8 Oct 2024 10:15:37 -0700 Subject: [PATCH 034/257] Fix flaky AutoRepairTest::testSetup and AutoRepairTest::testSafeGuardSetupCall --- .../cassandra/repair/autorepair/AutoRepairTest.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 00c45693659c..e586eec9939b 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -41,6 +41,7 @@ import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; public class AutoRepairTest extends CQLTester { @@ -78,8 +79,8 @@ public void testSetup() { int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() + instance.repairExecutors.get(repairType).getActiveTaskCount(); - assertEquals(String.format("Expected 1 task in queue for %s but was %s", repairType, expectedTasks), - 1, expectedTasks); + assertTrue(String.format("Expected > 0 task in queue for %s but was %s", repairType, expectedTasks), + expectedTasks > 0); } } @@ -96,9 +97,10 @@ public void testSafeGuardSetupCall() assertEquals(RepairType.values().length, instance.repairExecutors.size()); for (RepairType repairType : instance.repairExecutors.keySet()) { - int expectedTasks = instance.repairExecutors.get(repairType).getCorePoolSize(); - assertEquals(String.format("Expected 1 task in queue for %s but was %s", repairType, expectedTasks), - 1, expectedTasks); + int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() + + instance.repairExecutors.get(repairType).getActiveTaskCount(); + assertTrue(String.format("Expected > 0 task in queue for %s but was %s", repairType, expectedTasks), + expectedTasks > 0); } } From 87190c42c1bc00224381dafbe61b7a1d570435b9 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Tue, 8 Oct 2024 14:28:44 -0700 Subject: [PATCH 035/257] Fix a test failure inside pylib/cqlshlib/test/test_cqlsh_output.py --- pylib/cqlshlib/test/test_cqlsh_output.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pylib/cqlshlib/test/test_cqlsh_output.py b/pylib/cqlshlib/test/test_cqlsh_output.py index c32690b42496..f218ea2b313c 100644 --- a/pylib/cqlshlib/test/test_cqlsh_output.py +++ b/pylib/cqlshlib/test/test_cqlsh_output.py @@ -701,7 +701,9 @@ def test_describe_columnfamily_output(self): AND read_repair = 'BLOCKING' AND transactional_mode = 'off' AND transactional_migration_from = 'none' - AND speculative_retry = '99p';""" % quote_name(get_keyspace())) + AND speculative_retry = '99p' + AND automated_repair_full = {'enabled': 'true'} + AND automated_repair_incremental = {'enabled': 'true'};""" % quote_name(get_keyspace())) with cqlsh_testrun(tty=True, env=self.default_env) as c: for cmdword in ('describe table', 'desc columnfamily'): From a21c01679e3e35572d40347f133579d4ac6c683a Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 8 Oct 2024 21:10:48 -0700 Subject: [PATCH 036/257] AutoRepair nodetool throws an exception if the framework is disabled --- .../apache/cassandra/repair/autorepair/AutoRepair.java | 1 - .../org/apache/cassandra/service/AutoRepairService.java | 2 +- src/java/org/apache/cassandra/service/StorageService.java | 1 + .../apache/cassandra/tools/nodetool/AutoRepairStatus.java | 8 ++++++++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 8f3307e8ee07..5d5bcf3568c2 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -119,7 +119,6 @@ public void setup() return; } AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); - AutoRepairService.setup(); AutoRepairUtils.setup(); for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 1adad42b263d..d2395f1157b5 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -190,7 +190,7 @@ public Set getOnGoingRepairHostIds(RepairType rType) List histories = AutoRepairUtils.getAutoRepairHistory(rType); if (histories == null) { - return null; + return hostIds; } AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(rType)); for (UUID id : currentRepairStatus.hostIdsWithOnGoingRepair) diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index e962e70a6017..a635b8f7b7b6 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -1133,6 +1133,7 @@ public void doAuthSetup(boolean async) public void doAutoRepairSetup() { + AutoRepairService.setup(); if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { logger.info("Enable auto-repair scheduling"); diff --git a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java index 882f2a4d3cda..7b96102c6698 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java +++ b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java @@ -44,6 +44,14 @@ public void execute(NodeProbe probe) { checkArgument(repairType != null, "--repair-type is required."); PrintStream out = probe.output().out; + + AutoRepairConfig config = probe.getAutoRepairConfig(); + if (config == null || !config.isAutoRepairSchedulingEnabled()) + { + out.println("Auto-repair is not enabled"); + return; + } + TableBuilder table = new TableBuilder(); table.add("Active Repairs"); Set ongoingRepairHostIds = probe.getOnGoingRepairHostIds(repairType); From fda44af3dc3a97cd26da21c7782e2cf0d5c9aa9b Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 9 Oct 2024 09:37:23 -0700 Subject: [PATCH 037/257] Run repair on bootstrapped if configured to do so --- .../cassandra/repair/autorepair/AutoRepairUtils.java | 11 +++++++++++ .../cassandra/tcm/sequences/BootstrapAndJoin.java | 3 +++ .../cassandra/tcm/sequences/BootstrapAndReplace.java | 3 +++ .../cassandra/tcm/sequences/ReplaceSameAddress.java | 4 ++++ .../cassandra/repair/autorepair/AutoRepairTest.java | 2 ++ .../tools/nodetool/AutoRepairStatusTest.java | 11 +++++++++-- 6 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 73fc5ab10036..6b2d034d916c 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -834,4 +834,15 @@ public static List getAllMVs(RepairType repairType, Keyspace keyspace, T } return allMvs; } + + public static void runRepairOnNewlyBootstrappedNodeIfEnabled() + { + AutoRepairConfig repairConfig = DatabaseDescriptor.getAutoRepairConfig(); + if (repairConfig.isAutoRepairSchedulingEnabled()) + { + for (AutoRepairConfig.RepairType rType : AutoRepairConfig.RepairType.values()) + if (repairConfig.isAutoRepairEnabled(rType) && repairConfig.getForceRepairNewNode(rType)) + AutoRepairUtils.setForceRepairNewNode(rType); + } + } } diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java index 607ee2f669c3..19d6cb50edae 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java @@ -40,6 +40,7 @@ import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordService; @@ -220,6 +221,8 @@ public SequenceState executeNext() "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); return halted(); } + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java index 2b283d6905af..f3a3a8ff4577 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java @@ -44,6 +44,7 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -217,6 +218,8 @@ public SequenceState executeNext() "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); return halted(); } + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else diff --git a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java index be75538d6888..c24fe17a10fc 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java +++ b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java @@ -29,6 +29,8 @@ import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -85,6 +87,8 @@ public static void streamData(NodeId nodeId, ClusterMetadata metadata, boolean s "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); throw new IllegalStateException("Could not finish join for during replacement"); } + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } if (finishJoiningRing) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index e586eec9939b..be2b6cc47d75 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -38,6 +38,7 @@ import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.schema.SchemaTestUtil; +import org.apache.cassandra.service.AutoRepairService; import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.junit.Assert.assertEquals; @@ -60,6 +61,7 @@ public void setup() DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.full, true); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + AutoRepairService.setup(); } @After diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java index a92f9ec960fe..5bfe9091ff07 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -29,12 +29,14 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.Output; import org.mockito.Mock; import org.mockito.MockitoAnnotations; +import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.when; @@ -61,14 +63,19 @@ public static Collection repairTypes() } @Before - public void setUp() + public void setUp() throws Exception { MockitoAnnotations.initMocks(this); cmdOutput = new ByteArrayOutputStream(); PrintStream out = new PrintStream(cmdOutput); when(probe.output()).thenReturn(new Output(out, out)); - when(probe.getAutoRepairConfig()).thenReturn(config); cmd = new AutoRepairStatus(); + DatabaseDescriptor.daemonInitialization(); + DatabaseDescriptor.loadConfig(); + setAutoRepairEnabled(true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.full, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + when(probe.getAutoRepairConfig()).thenReturn(DatabaseDescriptor.getAutoRepairConfig()); } @Test(expected = IllegalArgumentException.class) From d26b24ffa1c5ebb1417e7522bbfc83a67bac60a8 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 9 Oct 2024 11:00:06 -0700 Subject: [PATCH 038/257] Fix broken AutoRepairSchedulerTest.java & AutoRepairStatusTest.java --- .../distributed/test/repair/AutoRepairSchedulerTest.java | 2 ++ .../apache/cassandra/tools/nodetool/AutoRepairStatusTest.java | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 1c4ebae5a34e..43c29f14e0de 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -39,6 +39,7 @@ import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.repair.autorepair.AutoRepair; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.service.AutoRepairService; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.junit.Assert.assertEquals; @@ -100,6 +101,7 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { + AutoRepairService.instance.setup(); DatabaseDescriptor.setCDCOnRepairEnabled(false); AutoRepair.instance.setup(); } diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java index 5bfe9091ff07..16b12ee2350e 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -48,9 +48,6 @@ public class AutoRepairStatusTest private ByteArrayOutputStream cmdOutput; - @Mock - private static AutoRepairConfig config; - private static AutoRepairStatus cmd; @Parameterized.Parameter() From 43c0f992cbad4414de008c753ab097a7e699b976 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 9 Oct 2024 14:47:56 -0700 Subject: [PATCH 039/257] Comments from Chris Lohfink: nix automated_ and a typo --- pylib/cqlshlib/test/test_cqlsh_output.py | 4 ++-- .../cql3/statements/schema/TableAttributes.java | 8 ++++---- .../apache/cassandra/schema/SchemaKeyspace.java | 16 ++++++++-------- .../org/apache/cassandra/schema/TableParams.java | 12 ++++++------ .../tools/nodetool/GetAutoRepairConfig.java | 2 +- .../cql3/statements/DescribeStatementTest.java | 8 ++++---- .../apache/cassandra/db/SchemaCQLHelperTest.java | 4 ++-- .../autorepair/AutoRepairParameterizedTest.java | 6 +++--- 8 files changed, 30 insertions(+), 30 deletions(-) diff --git a/pylib/cqlshlib/test/test_cqlsh_output.py b/pylib/cqlshlib/test/test_cqlsh_output.py index f218ea2b313c..a2524d032cb6 100644 --- a/pylib/cqlshlib/test/test_cqlsh_output.py +++ b/pylib/cqlshlib/test/test_cqlsh_output.py @@ -702,8 +702,8 @@ def test_describe_columnfamily_output(self): AND transactional_mode = 'off' AND transactional_migration_from = 'none' AND speculative_retry = '99p' - AND automated_repair_full = {'enabled': 'true'} - AND automated_repair_incremental = {'enabled': 'true'};""" % quote_name(get_keyspace())) + AND repair_full = {'enabled': 'true'} + AND repair_incremental = {'enabled': 'true'};""" % quote_name(get_keyspace())) with cqlsh_testrun(tty=True, env=self.default_env) as c: for cmdword in ('describe table', 'desc columnfamily'): diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index 62846cffa8ec..c8cfca9731ac 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -198,11 +198,11 @@ private TableParams build(TableParams.Builder builder) if (hasOption(Option.TRANSACTIONAL_MIGRATION_FROM)) builder.transactionalMigrationFrom(TransactionalMigrationFromMode.fromString(getString(Option.TRANSACTIONAL_MIGRATION_FROM))); - if (hasOption(Option.AUTOMATED_REPAIR_FULL)) - builder.automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, getMap(Option.AUTOMATED_REPAIR_FULL))); + if (hasOption(Option.REPAIR_FULL)) + builder.automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, getMap(Option.REPAIR_FULL))); - if (hasOption(Option.AUTOMATED_REPAIR_INCREMENTAL)) - builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, getMap(Option.AUTOMATED_REPAIR_INCREMENTAL))); + if (hasOption(Option.REPAIR_INCREMENTAL)) + builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, getMap(Option.REPAIR_INCREMENTAL))); return builder.build(); } diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 372da4a92a9d..ffeb4580eba5 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -133,8 +133,8 @@ private SchemaKeyspace() + "cdc boolean," + "read_repair text," + "fast_path frozen>," - + "automated_repair_full frozen>," - + "automated_repair_incremental frozen>," + + "repair_full frozen>," + + "repair_incremental frozen>," + "PRIMARY KEY ((keyspace_name), table_name))"); private static final TableMetadata Columns = @@ -219,8 +219,8 @@ private SchemaKeyspace() + "additional_write_policy text," + "cdc boolean," + "read_repair text," - + "automated_repair_full frozen>," - + "automated_repair_incremental frozen>," + + "repair_full frozen>," + + "repair_incremental frozen>," + "PRIMARY KEY ((keyspace_name), view_name))"); private static final TableMetadata Indexes = @@ -598,8 +598,8 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("compression", params.compression.asMap()) .add("read_repair", params.readRepair.toString()) .add("extensions", params.extensions) - .add("automated_repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) - .add("automated_repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); + .add("repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) + .add("repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ @@ -1090,8 +1090,8 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) .fastPath(getFastPathStrategy(row)) - .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, row.getFrozenTextMap("automated_repair_full"))) - .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("automated_repair_incremental"))); + .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, row.getFrozenTextMap("repair_full"))) + .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 7fc70e08a4f8..2c3cfe4daf28 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -103,8 +103,8 @@ public enum Option TRANSACTIONAL_MODE, TRANSACTIONAL_MIGRATION_FROM, PENDING_DROP, - AUTOMATED_REPAIR_FULL, - AUTOMATED_REPAIR_INCREMENTAL; + REPAIR_FULL, + REPAIR_INCREMENTAL; @Override public String toString() @@ -372,8 +372,8 @@ public String toString() .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) .add(PENDING_DROP.toString(), pendingDrop) - .add(Option.AUTOMATED_REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.full)) - .add(Option.AUTOMATED_REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.incremental)) + .add(Option.REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.full)) + .add(Option.REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.incremental)) .toString(); } @@ -436,9 +436,9 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()) .newLine() - .append("AND automated_repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) + .append("AND repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) .newLine() - .append("AND automated_repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); + .append("AND repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); } public static final class Builder diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index c62063b13b7f..48f8d54de3f6 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -74,7 +74,7 @@ private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, Au sb.append("\n\tpercentage of parallel repairs within group: " + config.getParallelRepairPercentage(repairType)); sb.append("\n\tmv repair enabled: " + config.getMVRepairEnabled(repairType)); sb.append("\n\tinitial scheduler delay: " + config.getInitialSchedulerDelay(repairType)); - sb.append("\n\trepair setssion timeout: " + config.getRepairSessionTimeout(repairType)); + sb.append("\n\trepair session timeout: " + config.getRepairSessionTimeout(repairType)); return sb.toString(); } diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index e16503b0172a..7bcb63c15e1c 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -1144,8 +1144,8 @@ private static String tableParametersCql() " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = '99p'\n" + - " AND automated_repair_full = {'enabled': 'true'}\n" + - " AND automated_repair_incremental = {'enabled': 'true'};"; + " AND repair_full = {'enabled': 'true'}\n" + + " AND repair_incremental = {'enabled': 'true'};"; } private static String cqlQuoted(Map map) @@ -1173,8 +1173,8 @@ private static String mvParametersCql() " AND min_index_interval = 128\n" + " AND read_repair = 'BLOCKING'\n" + " AND speculative_retry = '99p'\n" + - " AND automated_repair_full = {'enabled': 'true'}\n" + - " AND automated_repair_incremental = {'enabled': 'true'};"; + " AND repair_full = {'enabled': 'true'}\n" + + " AND repair_incremental = {'enabled': 'true'};"; } private static String keyspaceOutput() diff --git a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java index 9ff2ad417545..14688e6f628f 100644 --- a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java +++ b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java @@ -350,8 +350,8 @@ public void testCfmOptionsCQL() " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = 'ALWAYS'\n" + - " AND automated_repair_full = {'enabled': 'true'}\n" + - " AND automated_repair_incremental = {'enabled': 'true'};" + " AND repair_full = {'enabled': 'true'}\n" + + " AND repair_incremental = {'enabled': 'true'};" )); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 836ce294a882..40d71aa83834 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -128,7 +128,7 @@ public void setup() SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", KEYSPACE, TABLE)); - QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH automated_repair_full = {'enabled': 'false'} AND automated_repair_incremental = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH repair_full = {'enabled': 'false'} AND repair_incremental = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); @@ -562,8 +562,8 @@ public void testTokenRangesSplit() @Test public void testTableAttribute() { - assertTrue(TableAttributes.validKeywords().contains("automated_repair_full")); - assertTrue(TableAttributes.validKeywords().contains("automated_repair_incremental")); + assertTrue(TableAttributes.validKeywords().contains("repair_full")); + assertTrue(TableAttributes.validKeywords().contains("repair_incremental")); } @Test From 69267a42a0103f67335b97c60414b8a44a4b4f66 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 9 Oct 2024 16:09:57 -0700 Subject: [PATCH 040/257] Comments from Chris Lohfink: System::currentTimeMillis --> Clock.Global::currentTimeMillis --- .../org/apache/cassandra/repair/autorepair/AutoRepair.java | 3 ++- .../apache/cassandra/repair/autorepair/AutoRepairState.java | 3 ++- .../org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 5d5bcf3568c2..51176cb97724 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -39,6 +39,7 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.Pair; import org.slf4j.Logger; @@ -67,7 +68,7 @@ public class AutoRepair private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); @VisibleForTesting - protected static Supplier timeFunc = System::currentTimeMillis; + protected static Supplier timeFunc = Clock.Global::currentTimeMillis; public static AutoRepair instance = new AutoRepair(); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index d6f7e066d41e..d3119dea21c8 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -36,6 +36,7 @@ import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.progress.ProgressEvent; import org.apache.cassandra.utils.progress.ProgressEventType; @@ -60,7 +61,7 @@ public abstract class AutoRepairState implements ProgressListener protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); @VisibleForTesting - protected static Supplier timeFunc = System::currentTimeMillis; + protected static Supplier timeFunc = Clock.Global::currentTimeMillis; @VisibleForTesting protected final RepairType repairType; diff --git a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java index c24fe17a10fc..1c55d84ca244 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java +++ b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java @@ -29,7 +29,6 @@ import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; From 6fd372e8dbec36a533802eb1be03aaa4600308a3 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:05:30 -0500 Subject: [PATCH 041/257] Update IAutoRepairTokenRangeSplitter API to be more flexible Updates IAutoRepairTokenRangeSplitter API to take in a set of tables instead of a single table and return "RepairAssignments". This allows the API more control over how the repairs are issued by the AutoRepair framework, by allowing grouping of repairs by specific tables or a single repair at the keyspace level. Also updates IAutoRepairTokenRangeSplitter to be a ParameterizedClass, so custom splitters can have their own configuration. Update AutoRepair to account for these changes. Update DefaultAutoRepairTokenSplitter to account for the change, and to also evaluate on getRepairByKeyspace option. This also improves the filtering of tables that should not be repaired where previously getRepairByKeyspace would not factor this in. Move logic to split token ranges evenly into AutoRepairUtils so it can be reutilized by other splitters. --- .../cassandra/config/DatabaseDescriptor.java | 6 + .../cassandra/config/ParameterizedClass.java | 3 +- .../cassandra/metrics/AutoRepairMetrics.java | 11 +- .../repair/autorepair/AutoRepair.java | 265 +++++++++--------- .../repair/autorepair/AutoRepairConfig.java | 10 +- .../repair/autorepair/AutoRepairState.java | 14 + .../repair/autorepair/AutoRepairUtils.java | 29 +- .../DefaultAutoRepairTokenSplitter.java | 66 ++--- .../IAutoRepairTokenRangeSplitter.java | 76 ++++- .../schema/SystemDistributedKeyspace.java | 2 +- .../apache/cassandra/utils/FBUtilities.java | 29 +- .../org/apache/cassandra/cql3/CQLTester.java | 4 +- .../autorepair/AutoRepairConfigTest.java | 8 +- ...DefaultTokenSplitterParameterizedTest.java | 172 ++++++++++++ .../AutoRepairParameterizedTest.java | 47 ++-- 15 files changed, 522 insertions(+), 220 deletions(-) create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index abc3ca58787a..5a8077e916d2 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5915,4 +5915,10 @@ public static void setIncrementalRepairDiskHeadroomRejectRatio(double value) { conf.incremental_repair_disk_headroom_reject_ratio = value; } + + @VisibleForTesting + public static void setPartitioner(String name) + { + partitioner = FBUtilities.newPartitioner(name); + } } diff --git a/src/java/org/apache/cassandra/config/ParameterizedClass.java b/src/java/org/apache/cassandra/config/ParameterizedClass.java index d772629f194c..a38dbc1d2f0b 100644 --- a/src/java/org/apache/cassandra/config/ParameterizedClass.java +++ b/src/java/org/apache/cassandra/config/ParameterizedClass.java @@ -20,6 +20,7 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.Collections; +import java.io.Serializable; import java.util.List; import java.util.Map; import java.util.function.Predicate; @@ -33,7 +34,7 @@ import static org.apache.cassandra.utils.Shared.Scope.SIMULATION; @Shared(scope = SIMULATION) -public class ParameterizedClass +public class ParameterizedClass implements Serializable { public static final String CLASS_NAME = "class_name"; public static final String PARAMETERS = "parameters"; diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index dedd8f170170..bdb6deacf6ad 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -39,7 +39,7 @@ public class AutoRepairMetrics public Gauge succeededTokenRangesCount; public Gauge failedTokenRangesCount; public Gauge skippedTokenRangesCount; - + public Gauge skippedTablesCount; public Counter repairTurnMyTurn; public Counter repairTurnMyTurnDueToPriority; public Counter repairTurnMyTurnForceRepair; @@ -82,6 +82,15 @@ public Integer getValue() } }); + skippedTablesCount = Metrics.register(factory.createMetricName("SkippedTablesCount"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getSkippedTablesCount(); + } + }); + + longestUnrepairedSec = Metrics.register(factory.createMetricName("LongestUnrepairedSec"), new Gauge() { public Integer getValue() diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 51176cb97724..1abb71d69cac 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -32,7 +32,7 @@ import java.util.function.Supplier; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.cassandra.repair.RepairCoordinator; @@ -40,7 +40,7 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -128,10 +128,10 @@ public void setup() AutoRepairService.instance.checkCanRun(repairType); repairExecutors.get(repairType).scheduleWithFixedDelay( - () -> repair(repairType), - config.getInitialSchedulerDelay(repairType).toSeconds(), - config.getRepairCheckInterval().toSeconds(), - TimeUnit.SECONDS); + () -> repair(repairType), + config.getInitialSchedulerDelay(repairType).toSeconds(), + config.getRepairCheckInterval().toSeconds(), + TimeUnit.SECONDS); } isSetupDone = true; } @@ -197,9 +197,7 @@ public void repair(AutoRepairConfig.RepairType repairType) repairState.setTotalTablesConsideredForRepair(0); repairState.setTotalMVTablesConsideredForRepair(0); - int failedTokenRanges = 0; - int succeededTokenRanges = 0; - int skippedTokenRanges = 0; + CollectectedRepairStats collectectedRepairStats = new CollectectedRepairStats(); List keyspaces = new ArrayList<>(); Keyspace.all().forEach(keyspaces::add); @@ -215,149 +213,116 @@ public void repair(AutoRepairConfig.RepairType repairType) } repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); - List tablesToBeRepaired = retrieveTablesToBeRepaired(keyspace, repairType, repairState); - shuffleFunc.accept(tablesToBeRepaired); - for (String tableName : tablesToBeRepaired) + List tablesToBeRepairedList = retrieveTablesToBeRepaired(keyspace, config, repairType, repairState, collectectedRepairStats); + shuffleFunc.accept(tablesToBeRepairedList); + String keyspaceName = keyspace.getName(); + List repairAssignments = tokenRangeSplitters.get(repairType).getRepairAssignments(repairType, primaryRangeOnly, keyspaceName, tablesToBeRepairedList); + + int totalRepairAssignments = repairAssignments.size(); + long keyspaceStartTime = timeFunc.get(); + RepairAssignment previousAssignment = null; + long tableStartTime = timeFunc.get(); + int totalProcessedAssignments = 0; + Set> ranges = new HashSet<>(); + for (RepairAssignment curRepairAssignment : repairAssignments) { - String keyspaceName = keyspace.getName(); try { - List> subRangesToBeRepaired = tokenRangeSplitters.get(repairType).getRange(repairType, primaryRangeOnly, keyspaceName, tableName); - int totalSubRanges = subRangesToBeRepaired.size(); - - ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); - if (!columnFamilyStore.metadata().params.automatedRepair.get(repairType).repairEnabled()) + totalProcessedAssignments++; + boolean repairOneTableAtATime = !config.getRepairByKeyspace(repairType); + if (previousAssignment != null && repairOneTableAtATime && !previousAssignment.tableNames.equals(curRepairAssignment.tableNames)) { - logger.info("Repair is disabled for keyspace {} for tables: {}", keyspaceName, tableName); - repairState.setTotalDisabledTablesRepairCount(repairState.getTotalDisabledTablesRepairCount() + 1); - continue; + // In the repair assignment, all the tables are appended sequnetially. + // Check if we have a different table, and if so, we should reset the table start time. + tableStartTime = timeFunc.get(); } - // this is done to make autorepair safe as running repair on table with more sstables - // may have its own challenges - int size = columnFamilyStore.getLiveSSTables().size(); - if (size > config.getRepairSSTableCountHigherThreshold(repairType)) + previousAssignment = curRepairAssignment; + if (!config.isAutoRepairEnabled(repairType)) { - logger.info("Too many SSTables for repair, not doing repair on table {}.{} " + - "totalSSTables {}", keyspaceName, tableName, columnFamilyStore.getLiveSSTables().size()); - skippedTokenRanges += totalSubRanges; - continue; + logger.error("Auto-repair for type {} is disabled hence not running repair", repairType); + repairState.setRepairInProgress(false); + return; } - - if (config.getRepairByKeyspace(repairType)) + if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, tablesToBeRepairedList.size())) { - logger.info("Repair keyspace {} for tables: {}", keyspaceName, tablesToBeRepaired); + collectectedRepairStats.skippedTokenRanges += totalRepairAssignments - totalProcessedAssignments; + logger.info("Keyspace took too much time to repair hence skipping it {}", + keyspaceName); + break; } - else + if (repairOneTableAtATime && AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) { - logger.info("Repair table {}.{}", keyspaceName, tableName); + collectectedRepairStats.skippedTokenRanges += 1; + logger.info("Table took too much time to repair hence skipping it table name {}.{}, token range {}", + keyspaceName, curRepairAssignment.tableNames, curRepairAssignment.tokenRange); + continue; } - long tableStartTime = timeFunc.get(); - Set> ranges = new HashSet<>(); - int totalProcessedSubRanges = 0; - for (Pair token : subRangesToBeRepaired) - { - if (!config.isAutoRepairEnabled(repairType)) - { - logger.error("Auto-repair for type {} is disabled hence not running repair", repairType); - repairState.setRepairInProgress(false); - return; - } - if (config.getRepairByKeyspace(repairType)) + Range tokenRange = curRepairAssignment.getTokenRange(); + logger.debug("Current Token Left side {}, right side {}", + tokenRange.left.toString(), + tokenRange.right.toString()); + + ranges.add(curRepairAssignment.getTokenRange()); + if ((totalProcessedAssignments % config.getRepairThreads(repairType) == 0) || + (totalProcessedAssignments == totalRepairAssignments)) + { + int retryCount = 0; + Future f = null; + while (retryCount <= config.getRepairMaxRetries()) { - if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, tableStartTime, tablesToBeRepaired.size())) + RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, + Lists.newArrayList(curRepairAssignment.getTableNames()), + ranges, primaryRangeOnly); + repairState.resetWaitCondition(); + f = repairRunnableExecutors.get(repairType).submit(task); + try { - skippedTokenRanges += totalSubRanges - totalProcessedSubRanges; - logger.info("Keyspace took too much time to repair hence skipping it {}", - keyspaceName); - break; + repairState.waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); } - } - else - { - if (AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) + catch (InterruptedException e) { - skippedTokenRanges += totalSubRanges - totalProcessedSubRanges; - logger.info("Table took too much time to repair hence skipping it {}.{}", - keyspaceName, tableName); - break; + logger.error("Exception in cond await:", e); } - } - Token childStartToken = token.left; - Token childEndToken = token.right; - logger.debug("Current Token Left side {}, right side {}", childStartToken - .toString(), childEndToken.toString()); - - ranges.add(new Range<>(childStartToken, childEndToken)); - totalProcessedSubRanges++; - if ((totalProcessedSubRanges % config.getRepairThreads(repairType) == 0) || - (totalProcessedSubRanges == totalSubRanges)) - { - int retryCount = 0; - Future f = null; - while (retryCount <= config.getRepairMaxRetries()) + if (repairState.isSuccess()) { - RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, - config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : ImmutableList.of(tableName), - ranges, primaryRangeOnly); - repairState.resetWaitCondition(); - f = repairRunnableExecutors.get(repairType).submit(task); - try - { - repairState.waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); - } - catch (InterruptedException e) - { - logger.error("Exception in cond await:", e); - } - if (repairState.isSuccess()) - { - break; - } - else if (retryCount < config.getRepairMaxRetries()) - { - boolean cancellationStatus = f.cancel(true); - logger.warn("Repair failed for range {}-{} for {}.{} with cancellationStatus: {} retrying after {} seconds...", - childStartToken, childEndToken, - keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, - cancellationStatus, config.getRepairRetryBackoff().toSeconds()); - sleepFunc.accept(config.getRepairRetryBackoff().toSeconds(), TimeUnit.SECONDS); - } - retryCount++; + break; } - //check repair status - if (repairState.isSuccess()) + else if (retryCount < config.getRepairMaxRetries()) { - logger.info("Repair completed for range {}-{} for {}.{}, total subranges: {}," + - "processed subranges: {}", childStartToken, childEndToken, - keyspaceName, config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, totalSubRanges, totalProcessedSubRanges); - succeededTokenRanges += ranges.size(); + boolean cancellationStatus = f.cancel(true); + logger.warn("Repair failed for range {}-{} for {} tables {} with cancellationStatus: {} retrying after {} seconds...", + tokenRange.left, tokenRange.right, + keyspaceName, curRepairAssignment.getTableNames(), + cancellationStatus, config.getRepairRetryBackoff().toSeconds()); + sleepFunc.accept(config.getRepairRetryBackoff().toSeconds(), TimeUnit.SECONDS); } - else + retryCount++; + } + //check repair status + if (repairState.isSuccess()) + { + logger.info("Repair completed for range {}-{} for {} tables {}, total assignments: {}," + + "processed assignments: {}", tokenRange.left, tokenRange.right, + keyspaceName, curRepairAssignment.getTableNames(), totalRepairAssignments, totalProcessedAssignments); + collectectedRepairStats.succeededTokenRanges += ranges.size(); + } + else + { + boolean cancellationStatus = true; + if (f != null) { - boolean cancellationStatus = true; - if (f != null) - { - cancellationStatus = f.cancel(true); - } - //in the future we can add retry, etc. - logger.error("Repair failed for range {}-{} for {}.{} after {} retries, total subranges: {}," + - "processed subranges: {}, cancellationStatus: {}", childStartToken.toString(), childEndToken.toString(), keyspaceName, - config.getRepairByKeyspace(repairType) ? tablesToBeRepaired : tableName, retryCount, totalSubRanges, totalProcessedSubRanges, cancellationStatus); - failedTokenRanges += ranges.size(); + cancellationStatus = f.cancel(true); } - ranges.clear(); + //in the future we can add retry, etc. + logger.error("Repair failed for range {}-{} for {} tables {} after {} retries, total assignments: {}," + + "processed assignments: {}, cancellationStatus: {}", tokenRange.left, tokenRange.right, keyspaceName, + curRepairAssignment.getTableNames(), retryCount, totalRepairAssignments, totalProcessedAssignments, cancellationStatus); + collectectedRepairStats.failedTokenRanges += ranges.size(); } + ranges.clear(); } - if (config.getRepairByKeyspace(repairType)) - { - logger.info("Repair completed for keyspace {}, tables: {}", keyspaceName, tablesToBeRepaired); - break; - } - else - { - logger.info("Repair completed for {}.{}", keyspaceName, tableName); - } + logger.info("Repair completed for {} tables {}, range {}", keyspaceName, curRepairAssignment.getTableNames(), curRepairAssignment.getTokenRange()); } catch (Exception e) { @@ -365,7 +330,7 @@ else if (retryCount < config.getRepairMaxRetries()) } } } - cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, failedTokenRanges, succeededTokenRanges, skippedTokenRanges); + cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, collectectedRepairStats); } else { @@ -400,7 +365,7 @@ private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoR return false; } - private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairConfig.RepairType repairType, AutoRepairState repairState) + private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairConfig config, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, CollectectedRepairStats collectectedRepairStats) { Tables tables = keyspace.getMetadata().tables; List tablesToBeRepaired = new ArrayList<>(); @@ -410,6 +375,27 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon repairState.setTotalTablesConsideredForRepair(repairState.getTotalTablesConsideredForRepair() + 1); TableMetadata tableMetadata = iter.next(); String tableName = tableMetadata.name; + + ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); + if (!columnFamilyStore.metadata().params.automatedRepair.get(repairType).repairEnabled()) + { + logger.info("Repair is disabled for keyspace {} for tables: {}", keyspace.getName(), tableName); + repairState.setTotalDisabledTablesRepairCount(repairState.getTotalDisabledTablesRepairCount() + 1); + collectectedRepairStats.skippedTables++; + continue; + } + + // this is done to make autorepair safe as running repair on table with more sstables + // may have its own challenges + int totalSSTables = columnFamilyStore.getLiveSSTables().size(); + if (totalSSTables > config.getRepairSSTableCountHigherThreshold(repairType)) + { + logger.info("Too many SSTables for repair for table {}.{}" + + "totalSSTables {}", keyspace.getName(), tableName, totalSSTables); + collectectedRepairStats.skippedTables++; + continue; + } + tablesToBeRepaired.add(tableName); // See if we should repair MVs as well that are associated with this given table @@ -424,7 +410,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon } private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, - long startTime, int failedTokenRanges, int succeededTokenRanges, int skippedTokenRanges) throws InterruptedException + long startTime, CollectectedRepairStats collectectedRepairStats) throws InterruptedException { //if it was due to priority then remove it now if (turn == MY_TURN_DUE_TO_PRIORITY) @@ -433,16 +419,17 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType AutoRepairUtils.removePriorityStatus(repairType, myId); } - repairState.setFailedTokenRangesCount(failedTokenRanges); - repairState.setSucceededTokenRangesCount(succeededTokenRanges); - repairState.setSkippedTokenRangesCount(skippedTokenRanges); + repairState.setFailedTokenRangesCount(collectectedRepairStats.failedTokenRanges); + repairState.setSucceededTokenRangesCount(collectectedRepairStats.succeededTokenRanges); + repairState.setSkippedTokenRangesCount(collectectedRepairStats.skippedTokenRanges); + repairState.setSkippedTablesCount(collectectedRepairStats.skippedTables); repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + "repairTokenRangesSuccessCount {}, repairTokenRangesFailureCount {}, " + - "repairTokenRangesSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), + "repairTokenRangesSkipCount {}, repairTablesSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), repairState.getSucceededTokenRangesCount(), repairState.getFailedTokenRangesCount(), - repairState.getSkippedTokenRangesCount()); + repairState.getSkippedTokenRangesCount(), repairState.getSkippedTablesCount()); if (repairState.getLastRepairTime() != 0) { repairState.setClusterRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - @@ -466,4 +453,12 @@ public AutoRepairState getRepairState(AutoRepairConfig.RepairType repairType) { return repairStates.get(repairType); } + + static class CollectectedRepairStats + { + int failedTokenRanges = 0; + int succeededTokenRanges = 0; + int skippedTokenRanges = 0; + int skippedTables = 0; + } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index ac9394d9cfe1..30433a1ad7ff 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -20,6 +20,7 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collections; import java.util.EnumMap; import java.util.HashSet; import java.util.Map; @@ -31,6 +32,7 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.config.ParameterizedClass; public class AutoRepairConfig implements Serializable { @@ -274,7 +276,7 @@ public boolean getForceRepairNewNode(RepairType repairType) return applyOverrides(repairType, opt -> opt.force_repair_new_node); } - public String getTokenRangeSplitter(RepairType repairType) + public ParameterizedClass getTokenRangeSplitter(RepairType repairType) { return applyOverrides(repairType, opt -> opt.token_range_splitter); } @@ -331,7 +333,7 @@ protected static Options getDefaultOptions() opts.force_repair_new_node = false; opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.mv_repair_enabled = false; - opts.token_range_splitter = DefaultAutoRepairTokenSplitter.class.getName(); + opts.token_range_splitter = new ParameterizedClass(DefaultAutoRepairTokenSplitter.class.getName(), Collections.emptyMap()); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); // 5 minutes opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours @@ -395,9 +397,9 @@ protected static Options getDefaultOptions() // the default is 'true'. // This flag determines whether the auto-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. public volatile Boolean mv_repair_enabled; - // the default is DefaultAutoRepairTokenSplitter.class.getName(). The class should implement IAutoRepairTokenRangeSplitter. + // the default is DefaultAutoRepairTokenSplitter. The class should implement IAutoRepairTokenRangeSplitter. // The default implementation splits the tokens based on the token ranges owned by this node divided by the number of 'number_of_subranges' - public volatile String token_range_splitter; + public volatile ParameterizedClass token_range_splitter; // the minimum delay after a node starts before the scheduler starts running repair public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; // repair session timeout - this is applicable for each repair session diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index d3119dea21c8..b2a69c31a44c 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -88,6 +88,10 @@ public abstract class AutoRepairState implements ProgressListener protected int succeededTokenRangesCount = 0; @VisibleForTesting protected int skippedTokenRangesCount = 0; + + @VisibleForTesting + protected int skippedTablesCount = 0; + @VisibleForTesting protected AutoRepairHistory longestUnrepairedNode; @VisibleForTesting @@ -260,6 +264,16 @@ public int getSkippedTokenRangesCount() return skippedTokenRangesCount; } + public void setSkippedTablesCount(int count) + { + skippedTablesCount = count; + } + + public int getSkippedTablesCount() + { + return skippedTablesCount; + } + public boolean isSuccess() { return success; diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 6b2d034d916c..1a6fd7c75f37 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -33,6 +33,8 @@ import com.google.common.base.MoreObjects; import com.google.common.collect.Lists; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.locator.LocalStrategy; import org.slf4j.Logger; @@ -778,7 +780,6 @@ public static Set getPriorityHosts(RepairType repairType) for (UUID hostId : getPriorityHostIds(repairType)) { hosts.add(ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(hostId)).broadcastAddress); - //hosts.add(StorageService.instance.getTokenMetadata().getEndpointForHostId(hostId)); } return hosts; } @@ -845,4 +846,30 @@ public static void runRepairOnNewlyBootstrappedNodeIfEnabled() AutoRepairUtils.setForceRepairNewNode(rType); } } + + public static List> splitEvenly(Range tokenRange, int numberOfSplits) + { + List> splitRanges = new ArrayList<>(); + long left = (Long) tokenRange.left.getTokenValue(); + long right = (Long) tokenRange.right.getTokenValue(); + long repairTokenWidth = (right - left) / numberOfSplits; + for (int i = 0; i < numberOfSplits; i++) + { + long curLeft = left + (i * repairTokenWidth); + long curRight = curLeft + repairTokenWidth; + + if ((i + 1) == numberOfSplits) + { + curRight = right; + } + + Token childStartToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + curLeft); + Token childEndToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + curRight); + logger.debug("Current Token Left side {}, right side {}", childStartToken + .toString(), childEndToken.toString()); + Range splitRange = new Range<>(childStartToken, childEndToken); + splitRanges.add(splitRange); + } + return splitRanges; + } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java index 18231fff2af0..9a884f61c581 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java @@ -20,69 +20,61 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import org.apache.cassandra.service.AutoRepairService; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.utils.Pair; + +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.splitEvenly; public class DefaultAutoRepairTokenSplitter implements IAutoRepairTokenRangeSplitter { - private static final Logger logger = LoggerFactory.getLogger(DefaultAutoRepairTokenSplitter.class); - - @Override - public List> getRange(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, String tableName) + public List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) { - List> range = new ArrayList<>(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + List repairAssignments = new ArrayList<>(); Collection> tokens = StorageService.instance.getPrimaryRanges(keyspaceName); if (!primaryRangeOnly) { // if we need to repair non-primary token ranges, then change the tokens accrodingly - tokens = StorageService.instance.getLocalReplicas(keyspaceName).ranges(); + tokens = StorageService.instance.getLocalReplicas(keyspaceName).onlyFull().ranges(); } - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); int numberOfSubranges = config.getRepairSubRangeNum(repairType); + + boolean byKeyspace = config.getRepairByKeyspace(repairType); + + // collect all token ranges. + List> allRanges = new ArrayList<>(); for (Range token : tokens) { - Murmur3Partitioner.LongToken l = (Murmur3Partitioner.LongToken) (token.left); - Murmur3Partitioner.LongToken r = (Murmur3Partitioner.LongToken) (token.right); - // Token.TokenFactory factory = ClusterMetadata.current().partitioner.getTokenFactory(); - - Token parentStartToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + l.getTokenValue()); - Token parentEndToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + r.getTokenValue()); - logger.debug("Parent Token Left side {}, right side {}", parentStartToken.toString(), - parentEndToken.toString()); + allRanges.addAll(splitEvenly(token, numberOfSubranges)); + } - long left = (Long) l.getTokenValue(); - long right = (Long) r.getTokenValue(); - long repairTokenWidth = (right - left) / numberOfSubranges; - for (int i = 0; i < numberOfSubranges; i++) + if (byKeyspace) + { + for (Range splitRange : allRanges) { - long curLeft = left + (i * repairTokenWidth); - long curRight = curLeft + repairTokenWidth; - - if ((i + 1) == numberOfSubranges) + // add repair assignment for each range entire keyspace's tables + repairAssignments.add(new RepairAssignment(splitRange, keyspaceName, tableNames)); + } + } + else + { + // add repair assignment per table + for (String tableName : tableNames) + { + for (Range splitRange : allRanges) { - curRight = right; + repairAssignments.add(new RepairAssignment(splitRange, keyspaceName, Collections.singletonList(tableName))); } - - Token childStartToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + curLeft); - Token childEndToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + curRight); - logger.debug("Current Token Left side {}, right side {}", childStartToken - .toString(), childEndToken.toString()); - range.add(Pair.create(childStartToken, childEndToken)); } } - return range; + return repairAssignments; } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index 2b69898a360d..dd02c4d4f7b6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -19,13 +19,81 @@ import java.util.List; +import java.util.Objects; +import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.utils.Pair; public interface IAutoRepairTokenRangeSplitter { - // split the token range you wish to repair into multiple subranges - // the autorepair framework will repair the list of returned subrange in a sequence - List> getRange(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, String tableName); + + /** + * Split the token range you wish to repair into multiple assignments. + * The autorepair framework will repair the list of returned subrange in a sequence. + * @param repairType The type of repair being executed + * @param primaryRangeOnly Whether to repair only this node's primary ranges or all of its ranges. + * @param keyspaceName The keyspace being repaired + * @param tableNames The tables to repair + * @return repair assignments broken up by range, keyspace and tables. + */ + List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames); + + /** + * Defines a repair assignment to be issued by the autorepair framework. + */ + class RepairAssignment + { + final Range tokenRange; + + final String keyspaceName; + + final List tableNames; + + public RepairAssignment(Range tokenRange, String keyspaceName, List tableNames) + { + this.tokenRange = tokenRange; + this.keyspaceName = keyspaceName; + this.tableNames = tableNames; + } + + public Range getTokenRange() + { + return tokenRange; + } + + public String getKeyspaceName() + { + return keyspaceName; + } + + public List getTableNames() + { + return tableNames; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + RepairAssignment that = (RepairAssignment) o; + return Objects.equals(tokenRange, that.tokenRange) && Objects.equals(keyspaceName, that.keyspaceName) && Objects.equals(tableNames, that.tableNames); + } + + @Override + public int hashCode() + { + return Objects.hash(tokenRange, keyspaceName, tableNames); + } + + @Override + public String toString() + { + return "RepairAssignment{" + + "tokenRange=" + tokenRange + + ", keyspaceName='" + keyspaceName + '\'' + + ", tableNames=" + tableNames + + '}'; + } + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index 1759e18e822e..0eaa16aa540c 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -416,4 +416,4 @@ private enum BuildStatus { UNKNOWN, STARTED, SUCCESS } -} +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 63f353724c9b..2be783fe4a9d 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -66,6 +66,7 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; +import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.io.util.File; import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter; import org.apache.commons.lang3.StringUtils; @@ -731,11 +732,31 @@ public static AbstractCryptoProvider newCryptoProvider(String className, Map tokenRangeSplitterClass = Class.forName(className); + try + { + Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); + // first attempt to initialize with Map arguments. + return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(Map.class).newInstance(parameters); + } + catch (NoSuchMethodException nsme) + { + // fall back on no argument constructor. + return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); + } + } + catch (Exception ex) + { + throw new ConfigurationException("Unable to create instance of IAutoRepairTokenRangeSplitter for " + className, ex); + } } /** diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index 3492eeba23fa..59899b534a6a 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -3489,7 +3489,7 @@ protected static long seed() return SEED; } - protected static void setupSeed() + public static void setupSeed() { if (RANDOM != null) return; SEED = TEST_RANDOM_SEED.getLong(new DefaultRandom().nextLong()); @@ -3502,7 +3502,7 @@ public void resetSeed() RANDOM.setSeed(SEED); } - protected static void updateConfigs() + public static void updateConfigs() { if (CONFIG_GEN == null) CONFIG_GEN = new ConfigGenBuilder().build(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index c42959eec32b..4f1dd029ef5e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -20,6 +20,7 @@ import java.util.EnumMap; import java.util.Objects; +import java.util.Collections; import java.util.Set; import com.google.common.collect.ImmutableSet; @@ -31,6 +32,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.Options; @@ -384,14 +386,16 @@ public void testGetDefaultOptionsTokenRangeSplitter() { Options defaultOptions = Options.getDefaultOptions(); - assertEquals(DefaultAutoRepairTokenSplitter.class.getName(),defaultOptions.token_range_splitter); + ParameterizedClass expectedDefault = new ParameterizedClass(DefaultAutoRepairTokenSplitter.class.getName(), Collections.emptyMap()); + + assertEquals(expectedDefault, defaultOptions.token_range_splitter); assertEquals(DefaultAutoRepairTokenSplitter.class.getName(), FBUtilities.newAutoRepairTokenRangeSplitter(defaultOptions.token_range_splitter).getClass().getName()); } @Test(expected = ConfigurationException.class) public void testInvalidTokenRangeSplitter() { - assertEquals(DefaultAutoRepairTokenSplitter.class.getName(), FBUtilities.newAutoRepairTokenRangeSplitter("invalid-class").getClass().getName()); + FBUtilities.newAutoRepairTokenRangeSplitter(new ParameterizedClass("invalid-class", Collections.emptyMap())); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java new file mode 100644 index 000000000000..804d0a712b48 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.sai.disk.format.Version; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; + +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; +import static org.junit.Assert.assertEquals; + +@RunWith(Parameterized.class) +public class AutoRepairDefaultTokenSplitterParameterizedTest +{ + private static final String KEYSPACE = "ks"; + private static final String TABLE1 = "tbl1"; + private static final String TABLE2 = "tbl2"; + private static final String TABLE3 = "tbl3"; + + @Parameterized.Parameter() + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters(name = "repairType={0}") + public static Collection repairTypes() + { + return Arrays.asList(AutoRepairConfig.RepairType.values()); + } + + @BeforeClass + public static void setupClass() throws Exception + { + setupSeed(); + updateConfigs(); + DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); + ServerTestUtils.prepareServerNoRegister(); + + Token t1 = new Murmur3Partitioner.LongToken(0); + Token t2 = new Murmur3Partitioner.LongToken(256); + Token t3 = new Murmur3Partitioner.LongToken(1024); + Set tokens = new HashSet<>(); + tokens.add(t1); + tokens.add(t2); + tokens.add(t3); + + ServerTestUtils.registerLocal(tokens); + // Ensure that the on-disk format statics are loaded before the test run + Version.LATEST.onDiskFormat(); + StorageService.instance.doAutoRepairSetup(); + + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); + } + + private static void appendExpectedTokens(long left, long right, int numberOfSplits, List> expectedToken) + { + long repairTokenWidth = (right - left) / numberOfSplits; + for (int i = 0; i < numberOfSplits; i++) + { + long curLeft = left + (i * repairTokenWidth); + long curRight = curLeft + repairTokenWidth; + if ((i + 1) == numberOfSplits) + { + curRight = right; + } + Token childStartToken = ClusterMetadata.current() + .partitioner.getTokenFactory().fromString("" + curLeft); + Token childEndToken = ClusterMetadata.current() + .partitioner.getTokenFactory().fromString("" + curRight); + expectedToken.add(new Range<>(childStartToken, childEndToken)); + } + } + + @Test + public void testTokenRangesSplitByTable() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); + int totalTokenRanges = 3; + Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + assertEquals(totalTokenRanges, tokens.size()); + int numberOfSplits = 4; + List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); + List> expectedToken = new ArrayList<>(); + for (int i = 0; i < tables.size(); i++) + { + appendExpectedTokens(1024, 0, numberOfSplits, expectedToken); + appendExpectedTokens(0, 256, numberOfSplits, expectedToken); + appendExpectedTokens(256, 1024, numberOfSplits, expectedToken); + } + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairSubRangeNum(repairType, numberOfSplits); + List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); + assertEquals(totalTokenRanges * numberOfSplits * tables.size(), assignments.size()); + assertEquals(expectedToken.size(), assignments.size()); + + int expectedTableIndex = -1; + for (int i = 0; i < totalTokenRanges * numberOfSplits * tables.size(); i++) + { + if (i % (totalTokenRanges * numberOfSplits) == 0) + { + expectedTableIndex++; + } + assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); + assertEquals(Arrays.asList(tables.get(expectedTableIndex)), assignments.get(i).getTableNames()); + } + } + + @Test + public void testTokenRangesSplitByKeyspace() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); + int totalTokenRanges = 3; + Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + assertEquals(totalTokenRanges, tokens.size()); + int numberOfSplits = 4; + List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); + List> expectedToken = new ArrayList<>(); + appendExpectedTokens(1024, 0, numberOfSplits, expectedToken); + appendExpectedTokens(0, 256, numberOfSplits, expectedToken); + appendExpectedTokens(256, 1024, numberOfSplits, expectedToken); + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairSubRangeNum(repairType, numberOfSplits); + List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); + assertEquals(totalTokenRanges * numberOfSplits, assignments.size()); + assertEquals(expectedToken.size(), assignments.size()); + + for (int i = 0; i < totalTokenRanges * numberOfSplits; i++) + { + assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); + assertEquals(tables, assignments.get(i).getTableNames()); + } + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 40d71aa83834..103b3457f919 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -37,9 +38,9 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.utils.Pair; import org.junit.After; import org.junit.Assert; @@ -416,21 +417,26 @@ public void testSkipRepairSSTableCountHigherThreshold() assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); state.setLastRepairTime(0); AutoRepair.instance.repair(repairType); - assertEquals(1, state.getTotalMVTablesConsideredForRepair()); - assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - // skipping one time for the base table and another time for MV table - assertEquals(2, state.getSkippedTokenRangesCount()); - assertEquals(2, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(0, state.getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + // skipping both the tables - one table is due to its repair has been disabled, and another one due to high sstable count + assertEquals(0, state.getSkippedTokenRangesCount()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(2, state.getSkippedTablesCount()); + assertEquals(2, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); // set it to higher value, and this time, the tables should not be skipped - config.setRepairSSTableCountHigherThreshold(repairType, 11); config.setRepairSSTableCountHigherThreshold(repairType, beforeCount); state.setLastRepairTime(0); + state.setSkippedTablesCount(0); + state.setTotalMVTablesConsideredForRepair(0); AutoRepair.instance.repair(repairType); assertEquals(1, state.getTotalMVTablesConsideredForRepair()); - assertEquals(0, state.getSkippedTokenRangesCount()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(0, state.getSkippedTokenRangesCount()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(1, state.getSkippedTablesCount()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); } @Test @@ -536,27 +542,12 @@ public void testTokenRangesNoSplit() { Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(1, tokens.size()); - List> expectedToken = new ArrayList<>(); - expectedToken.addAll(tokens); + List> expectedToken = new ArrayList<>(tokens); - List> ranges = new DefaultAutoRepairTokenSplitter().getRange(repairType, true, KEYSPACE, TABLE); - assertEquals(1, ranges.size()); - assertEquals(expectedToken.get(0).left, ranges.get(0).left); - assertEquals(expectedToken.get(0).right, ranges.get(0).right); - } - - @Test - public void testTokenRangesSplit() - { - Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - assertEquals(1, tokens.size()); - List> expectedToken = new ArrayList<>(); - expectedToken.addAll(tokens); - - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setRepairSubRangeNum(repairType, 4); - List> ranges = new DefaultAutoRepairTokenSplitter().getRange(repairType, true, KEYSPACE, TABLE); - assertEquals(4, ranges.size()); + List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, Collections.singletonList(TABLE)); + assertEquals(1, assignments.size()); + assertEquals(expectedToken.get(0).left, assignments.get(0).getTokenRange().left); + assertEquals(expectedToken.get(0).right, assignments.get(0).getTokenRange().right); } @Test From 2ccdc37e83f47245ed3ff7924845017db52c7329 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sat, 12 Oct 2024 09:45:10 -0700 Subject: [PATCH 042/257] Fix a corner-case bug NPE org.apache.cassandra.repair.autorepair.AutoRepairUtils.myTurnToRunRepair --- .../repair/autorepair/AutoRepairUtils.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 1a6fd7c75f37..0a328de44f98 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -579,12 +579,8 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } - // get the longest unrepaired node from the nodes which are not running repair - AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); - //check who is next, which is helpful for debugging - logger.info("Next node to be repaired for repair type {} by default: {}", repairType, defaultNodeToBeRepaired); UUID priorityHostId = null; - if (currentRepairStatus.priority != null) + if (currentRepairStatus != null && currentRepairStatus.priority != null) { for (UUID priorityID : currentRepairStatus.priority) { @@ -615,8 +611,14 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) return MY_TURN_DUE_TO_PRIORITY; } - if (defaultNodeToBeRepaired.hostId.equals(myId)) + // get the longest unrepaired node from the nodes which are not running repair + AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); + //check who is next, which is helpful for debugging + logger.info("Next node to be repaired for repair type {} by default: {}", repairType, defaultNodeToBeRepaired); + if (defaultNodeToBeRepaired != null && defaultNodeToBeRepaired.hostId.equals(myId)) + { return MY_TURN; + } } else if (currentRepairStatus.hostIdsWithOnGoingForceRepair.contains(myId)) { From d35fb4b6731216ad51fff9a731f0ebb65d77dac9 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 14 Oct 2024 13:50:25 -0700 Subject: [PATCH 043/257] Update documentation for the newly added metric --- doc/modules/cassandra/pages/managing/operating/metrics.adoc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 4d52f4145c5d..2b3e2b75efb4 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1114,6 +1114,9 @@ ran on the node in seconds |SkippedTokenRangesCount |Gauge |Number of token ranges skipped on the node +|SkippedTablesCount |Gauge |Number of tables skipped +on the node + |TotalMVTablesConsideredForRepair |Gauge |Number of materialized views considered on the node From 01a68e5c61692e0fc44083da1c2ff600c4f7a643 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 14 Oct 2024 18:32:40 -0700 Subject: [PATCH 044/257] Fix the failed StorageServiceServerTest test cases --- .../autorepair/SSTableRepairedAtTest.java | 171 ++++++++++++++++++ .../service/StorageServiceServerTest.java | 125 ------------- 2 files changed, 171 insertions(+), 125 deletions(-) create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java new file mode 100644 index 000000000000..bd14eea805b3 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.net.UnknownHostException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.service.StorageService; + +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + + +public class SSTableRepairedAtTest extends CQLTester +{ + public static final String TEST_KEYSPACE = "test_keyspace"; + public static ColumnFamilyStore table1; + public static ColumnFamilyStore table2; + + @BeforeClass + public static void setUp() throws ConfigurationException, UnknownHostException + { + requireNetwork(); + AutoRepairUtils.setup(); + StorageService.instance.doAutoRepairSetup(); + DatabaseDescriptor.setCDCEnabled(false); + } + + @Before + public void clearData() + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", TEST_KEYSPACE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (key text, val text, primary key(key))", TEST_KEYSPACE, "table1")); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (key text, val text, primary key(key))", TEST_KEYSPACE, "table2")); + + Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table1").truncateBlocking(); + Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table2").truncateBlocking(); + + table1 = Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table1"); + assert table1 != null; + table2 = Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table2"); + assert table2 != null; + } + @Test + public void testGetTablesForKeyspace() + { + List result = StorageService.instance.getTablesForKeyspace(TEST_KEYSPACE); + + assertEquals(Arrays.asList(table1.name, table2.name), result.stream().sorted().collect(Collectors.toList())); + } + + @Test + public void testGetTablesForKeyspaceNotFound() + { + String missingKeyspace = "MISSING_KEYSPACE"; + try + { + StorageService.instance.getTablesForKeyspace(missingKeyspace); + fail("Expected an AssertionError to be thrown"); + } + catch (AssertionError e) + { + assertEquals("Unknown keyspace " + missingKeyspace, e.getMessage()); + } + } + + @Test + public void testMutateSSTableRepairedStateTableNotFound() + { + try + { + StorageService.instance.mutateSSTableRepairedState(true, false, TEST_KEYSPACE, Arrays.asList("MISSING_TABLE")); + fail("Expected an InvalidRequestException to be thrown"); + } + catch (InvalidRequestException e) + { + // Test passed + } + } + + @Test + public void testMutateSSTableRepairedStateTablePreview() + { + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + assertEquals(1, table1.getLiveSSTables().size()); + + List result = StorageService.instance.mutateSSTableRepairedState(true, true, TEST_KEYSPACE, Arrays.asList(table1.name)); + + assertEquals(1, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } + + @Test + public void testMutateSSTableRepairedStateTableRepaired() + { + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + assertEquals(2, table1.getLiveSSTables().size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + }); + + List result = StorageService.instance.mutateSSTableRepairedState(true, false, TEST_KEYSPACE, Arrays.asList(table1.name)); + + assertEquals(2, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertTrue(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } + + @Test + public void testMutateSSTableRepairedStateTableUnrepaired() throws Exception + { + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + table1.getCompactionStrategyManager().mutateRepaired(table1.getLiveSSTables(), 1, null, false); + assertEquals(2, table1.getLiveSSTables().stream().filter(SSTableReader::isRepaired).count()); + + List result = StorageService.instance.mutateSSTableRepairedState(false, false, TEST_KEYSPACE, Arrays.asList(table1.name)); + + assertEquals(2, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } +} diff --git a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java index aaa964caa0f9..9a6a0bda404d 100644 --- a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java +++ b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java @@ -31,13 +31,6 @@ import java.util.UUID; import com.google.common.collect.Sets; -import java.util.*; -import java.util.stream.Collectors; - -import org.apache.cassandra.SchemaLoader; -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.Keyspace; - import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -55,8 +48,6 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.exceptions.InvalidRequestException; -import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.WithPartitioner; import org.apache.cassandra.schema.KeyspaceMetadata; @@ -76,17 +67,10 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.GOSSIP_DISABLE_THREAD_VALIDATION; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - public class StorageServiceServerTest { - public static final String keyspace = "test_keyspace"; - public static ColumnFamilyStore table1; - public static ColumnFamilyStore table2; - static final String DC1 = "DC1"; static final String DC2 = "DC2"; static final String RACK = "rack1"; @@ -111,21 +95,6 @@ public static void setUp() throws ConfigurationException, UnknownHostException id5 = InetAddressAndPort.getByName("127.0.0.5"); registerNodes(); ServerTestUtils.markCMS(); - - SchemaLoader.createKeyspace(keyspace, KeyspaceParams.simple(1), - SchemaLoader.standardCFMD(keyspace, "table1").build(), - SchemaLoader.standardCFMD(keyspace, "table2").build()); - table1 = Keyspace.open(keyspace).getColumnFamilyStore("table1"); - assert table1 != null; - table2 = Keyspace.open(keyspace).getColumnFamilyStore("table2"); - assert table2 != null; - } - - @Before - public void clearData() - { - table1.truncateBlocking(); - table2.truncateBlocking(); } private static void registerNodes() @@ -634,7 +603,6 @@ public void testAuditLogEnableLoggerTransitions() throws Exception assertTrue(AuditLogManager.instance.isEnabled()); StorageService.instance.disableAuditLog(); } - /** Create a new AuditLogOptions instance with the log dir set appropriately to a temp dir for unit testing. */ @@ -648,97 +616,4 @@ private static AuditLogOptions getBaseAuditLogOptions() throws IOException return options; } - - @Test - public void testGetTablesForKeyspace() - { - List result = StorageService.instance.getTablesForKeyspace(keyspace); - - assertEquals(Arrays.asList(table1.name, table2.name), result.stream().sorted().collect(Collectors.toList())); - } - - @Test - public void testGetTablesForKeyspaceNotFound() - { - String missingKeyspace = "MISSING_KEYSPACE"; - try - { - StorageService.instance.getTablesForKeyspace(missingKeyspace); - fail("Expected an AssertionError to be thrown"); - } - catch (AssertionError e) - { - assertEquals("Unknown keyspace " + missingKeyspace, e.getMessage()); - } - } - - @Test - public void testMutateSSTableRepairedStateTableNotFound() - { - try - { - StorageService.instance.mutateSSTableRepairedState(true, false, keyspace, Arrays.asList("MISSING_TABLE")); - fail("Expected an InvalidRequestException to be thrown"); - } - catch (InvalidRequestException e) - { - // Test passed - } - } - - @Test - public void testMutateSSTableRepairedStateTablePreview() - { - SchemaLoader.insertData(keyspace, table1.name, 0, 1); - table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); - assertEquals(1, table1.getLiveSSTables().size()); - - List result = StorageService.instance.mutateSSTableRepairedState(true, true, keyspace, Arrays.asList(table1.name)); - - assertEquals(1, result.size()); - table1.getLiveSSTables().forEach(sstable -> { - assertFalse(sstable.isRepaired()); - assertTrue(result.contains(sstable.descriptor.baseFile().name())); - }); - } - - @Test - public void testMutateSSTableRepairedStateTableRepaired() - { - SchemaLoader.insertData(keyspace, table1.name, 0, 1); - table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); - SchemaLoader.insertData(keyspace, table1.name, 0, 1); - table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); - assertEquals(2, table1.getLiveSSTables().size()); - table1.getLiveSSTables().forEach(sstable -> { - assertFalse(sstable.isRepaired()); - }); - - List result = StorageService.instance.mutateSSTableRepairedState(true, false, keyspace, Arrays.asList(table1.name)); - - assertEquals(2, result.size()); - table1.getLiveSSTables().forEach(sstable -> { - assertTrue(sstable.isRepaired()); - assertTrue(result.contains(sstable.descriptor.baseFile().name())); - }); - } - - @Test - public void testMutateSSTableRepairedStateTableUnrepaired() throws Exception - { - SchemaLoader.insertData(keyspace, table1.name, 0, 1); - table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); - SchemaLoader.insertData(keyspace, table1.name, 0, 1); - table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); - table1.getCompactionStrategyManager().mutateRepaired(table1.getLiveSSTables(), 1, null, false); - assertEquals(2, table1.getLiveSSTables().stream().filter(SSTableReader::isRepaired).count()); - - List result = StorageService.instance.mutateSSTableRepairedState(false, false, keyspace, Arrays.asList(table1.name)); - - assertEquals(2, result.size()); - table1.getLiveSSTables().forEach(sstable -> { - assertFalse(sstable.isRepaired()); - assertTrue(result.contains(sstable.descriptor.baseFile().name())); - }); - } } From 9c901e0bcb26d7b7d8300da5db67215fc5a6cf29 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 15 Oct 2024 14:50:14 -0700 Subject: [PATCH 045/257] Formatting: Add a new line towards the end SystemDistributedKeyspace.java --- .../org/apache/cassandra/schema/SystemDistributedKeyspace.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index 0eaa16aa540c..1759e18e822e 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -416,4 +416,4 @@ private enum BuildStatus { UNKNOWN, STARTED, SUCCESS } -} \ No newline at end of file +} From 490ac25e1f0f5a9d204b78acf8abf53e7afff328 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 15 Oct 2024 14:58:36 -0700 Subject: [PATCH 046/257] Formatting: Add a new line towards the end IAutoRepairTokenRangeSplitter.java --- .../repair/autorepair/IAutoRepairTokenRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index dd02c4d4f7b6..169600eca405 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -96,4 +96,4 @@ public String toString() '}'; } } -} \ No newline at end of file +} From ad9ef82e2f2ec7416665c6e885632afcf6fd5a0b Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 15 Oct 2024 16:27:41 -0700 Subject: [PATCH 047/257] Formatting: YamlConfigurationLoader.java & CassandraStreamReceiver.java --- .../apache/cassandra/config/YamlConfigurationLoader.java | 1 + .../cassandra/db/streaming/CassandraStreamReceiver.java | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java b/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java index 0accb0d75f9f..f37a42e8fa54 100644 --- a/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java +++ b/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java @@ -461,3 +461,4 @@ public static LoaderOptions getDefaultLoaderOptions() return loaderOptions; } } + diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 3dd3bc40f179..8c11a0b7fbe5 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -23,13 +23,9 @@ import java.util.List; import java.util.Set; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; -import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; -import org.apache.cassandra.io.sstable.SSTable; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,6 +36,7 @@ import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.compaction.OperationType; import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; import org.apache.cassandra.db.lifecycle.LifecycleTransaction; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.ThrottledUnfilteredIterator; @@ -49,6 +46,7 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.ISSTableScanner; +import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableMultiWriter; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.service.accord.AccordService; @@ -208,7 +206,6 @@ private boolean cdcRequiresWriteCommitLog(ColumnFamilyStore cfs) * For CDC-enabled tables and write path for CDC is enabled, we want to ensure that the mutations are * run through the CommitLog, so they can be archived by the CDC process on discard. */ - @VisibleForTesting public boolean requiresWritePath(ColumnFamilyStore cfs) { return cdcRequiresWriteCommitLog(cfs) From be0377b0e1ae479ec36c5b67fd043ad9ce126e52 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 17 Oct 2024 16:47:38 -0700 Subject: [PATCH 048/257] Revert .circleci/config.yaml settings --- .circleci/config.yml | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9da3ea0f9aea..864919b8f418 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -220,7 +220,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -338,7 +338,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -625,7 +625,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -3788,7 +3788,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -4100,7 +4100,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - run: name: Log Environment Information @@ -4413,7 +4413,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -4530,7 +4530,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -4781,7 +4781,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -4885,7 +4885,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -5046,7 +5046,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -5160,7 +5160,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -5278,7 +5278,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -6444,7 +6444,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -6997,7 +6997,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -7502,7 +7502,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -7843,7 +7843,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -7914,7 +7914,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - run: name: Log Environment Information @@ -8196,7 +8196,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra @@ -8833,7 +8833,7 @@ jobs: resource_class: medium working_directory: ~/ shell: /bin/bash -eo pipefail -l - parallelism: 4 + parallelism: 1 steps: - attach_workspace: at: /home/cassandra From e1724de54dfe25724c8dc7dfeacbe4f762105980 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 27 Oct 2024 15:58:02 -0700 Subject: [PATCH 049/257] Code formatting: Remove Group from naming --- .../apache/cassandra/service/AutoRepairService.java | 8 ++++---- .../cassandra/service/AutoRepairServiceMBean.java | 4 ++-- src/java/org/apache/cassandra/tools/NodeProbe.java | 8 ++++---- .../cassandra/tools/nodetool/SetAutoRepairConfig.java | 10 +++++----- .../cassandra/service/AutoRepairServiceSetterTest.java | 4 ++-- .../tools/nodetool/SetAutoRepairConfigTest.java | 4 ++-- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index d2395f1157b5..87e01d10bcfb 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -162,15 +162,15 @@ public void setPrimaryTokenRangeOnly(RepairType repairType, boolean primaryToken } @Override - public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup) + public void setParallelRepairPercentage(RepairType repairType, int percentage) { - config.setParallelRepairPercentage(repairType, percentageInGroup); + config.setParallelRepairPercentage(repairType, percentage); } @Override - public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup) + public void setParallelRepairCount(RepairType repairType, int count) { - config.setParallelRepairCount(repairType, countInGroup); + config.setParallelRepairCount(repairType, count); } public void setMVRepairEnabled(RepairType repairType, boolean enabled) diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 5e62e5630043..b6f08715d3e4 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -56,9 +56,9 @@ public interface AutoRepairServiceMBean public void setPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly); - public void setParallelRepairPercentageInGroup(RepairType repairType, int percentageInGroup); + public void setParallelRepairPercentage(RepairType repairType, int percentage); - public void setParallelRepairCountInGroup(RepairType repairType, int countInGroup); + public void setParallelRepairCount(RepairType repairType, int count); public void setMVRepairEnabled(RepairType repairType, boolean enabled); diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 4a1d6bdecb07..643bb1b6b83b 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2632,12 +2632,12 @@ public void setAutoRepairIgnoreDCs(AutoRepairConfig.RepairType repairType, Set validateLocalGroupHosts(String paramVal) + private Set retrieveHosts(String paramVal) { Set hosts = new HashSet<>(); for (String host : Splitter.on(',').split(paramVal)) diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index 271d28b926de..58102ed7235b 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -73,8 +73,8 @@ public static Collection testCases() { forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), - forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentageInGroup, config::getParallelRepairPercentage), - forEachRepairType(700, AutoRepairService.instance::setParallelRepairCountInGroup, config::getParallelRepairCount), + forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), + forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, AutoRepairServiceSetterTest::isLocalHostForceRepair) diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 2a69cefcace9..5245062ad815 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -259,8 +259,8 @@ public static Collection testCases() forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type, 4)), forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type, "5s")), forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type, true)), - forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCountInGroup(type, 6)), - forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentageInGroup(type, 7)), + forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type, 6)), + forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type, 7)), forEachRepairType("mv_repair_enabled", "true", (type) -> verify(probe, times(1)).setMVRepairEnabled(type, true)), forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))) ).flatMap(Function.identity()).collect(Collectors.toList()); From c3f7c68047f22e9bdd1cfade2100b5e603d50e6f Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 27 Oct 2024 20:57:39 -0700 Subject: [PATCH 050/257] Enable repair scheduler through nodetool --- .../repair/autorepair/AutoRepairConfig.java | 8 +++++++- .../cassandra/service/AutoRepairService.java | 6 ++++++ .../service/AutoRepairServiceMBean.java | 2 ++ .../org/apache/cassandra/tools/NodeProbe.java | 5 +++++ .../tools/nodetool/SetAutoRepairConfig.java | 10 ++++++++-- .../tools/nodetool/SetAutoRepairConfigTest.java | 16 ++++++++++++++++ 6 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 30433a1ad7ff..a80a875125a9 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -39,7 +39,7 @@ public class AutoRepairConfig implements Serializable // enable/disable auto repair globally, overrides all other settings. Cannot be modified dynamically. // if it is set to false, then no repair will be scheduled, including full and incremental repairs by this framework. // if it is set to true, then this repair scheduler will consult another config available for each RepairType, and based on that config, it will schedule repairs. - public final Boolean enabled; + public volatile Boolean enabled; // the interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if // none is going, then check if it's time to schedule or wait public final DurationSpec.IntSecondsBound repair_check_interval = new DurationSpec.IntSecondsBound("5m"); @@ -108,6 +108,12 @@ public DurationSpec.IntSecondsBound getAutoRepairHistoryClearDeleteHostsBufferIn return history_clear_delete_hosts_buffer_interval; } + public void startScheduler() + { + enabled = true; + AutoRepair.instance.setup(); + } + public void setAutoRepairHistoryClearDeleteHostsBufferInterval(String duration) { history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound(duration); diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 87e01d10bcfb..f80ce1b551bf 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -120,6 +120,12 @@ public void setRepairMinInterval(RepairType repairType, String minRepairInterval config.setRepairMinInterval(repairType, minRepairInterval); } + @Override + public void startScheduler() + { + config.startScheduler(); + } + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) { config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index b6f08715d3e4..121c9a480303 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -42,6 +42,8 @@ public interface AutoRepairServiceMBean public void setRepairMinInterval(RepairType repairType, String minRepairInterval); + void startScheduler(); + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); public void setAutoRepairMaxRetriesCount(int retries); diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 643bb1b6b83b..5417c127e5e0 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2607,6 +2607,11 @@ public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) autoRepairProxy.setAutoRepairHistoryClearDeleteHostsBufferDuration(duration); } + public void startScheduler() + { + autoRepairProxy.startScheduler(); + } + public void setAutoRepairMaxRetriesCount(int retries) { autoRepairProxy.setAutoRepairMaxRetriesCount(retries); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index cdd3ca58a8fc..2929c944442a 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -43,7 +43,7 @@ public class SetAutoRepairConfig extends NodeToolCmd @VisibleForTesting @Arguments(title = " ", usage = " ", description = "autorepair param and value.\nPossible autorepair parameters are as following: " + - "[number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + + "[start_scheduler|number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + "|parallel_repair_count|parallel_repair_percentage|mv_repair_enabled|repair_max_retries|repair_retry_backoff|repair_session_timeout]", @@ -64,7 +64,7 @@ public void execute(NodeProbe probe) String paramType = args.get(0); String paramVal = args.get(1); - if (!probe.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (!probe.getAutoRepairConfig().isAutoRepairSchedulingEnabled() && !paramType.equalsIgnoreCase("start_scheduler")) { out.println("Auto-repair is not enabled"); return; @@ -73,6 +73,12 @@ public void execute(NodeProbe probe) // options that do not require --repair-type option switch (paramType) { + case "start_scheduler": + if (Boolean.parseBoolean(paramVal)) + { + probe.startScheduler(); + } + return; case "history_clear_delete_hosts_buffer_interval": probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); return; diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 5245062ad815..6e2d79def026 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -115,6 +115,22 @@ public void testRetryBackoffInSec() verify(probe, times(1)).setAutoRepairRetryBackoff("3s"); } + + @Test + public void testStartScheduler() + { + cmd.args = ImmutableList.of("start_scheduler", "false"); + + cmd.execute(probe); + + verify(probe, times(0)).startScheduler(); + + cmd.args = ImmutableList.of("start_scheduler", "true"); + + cmd.execute(probe); + + verify(probe, times(1)).startScheduler(); + } } @RunWith(Parameterized.class) From fdaed595d2086449d8d8f475f0d28872dc94ad46 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 29 Oct 2024 11:44:27 -0700 Subject: [PATCH 051/257] Increment system_distributed GENERATION --- .../org/apache/cassandra/schema/SystemDistributedKeyspace.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index 1759e18e822e..f9c023e33b32 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -85,7 +85,7 @@ private SystemDistributedKeyspace() * * // TODO: TCM - how do we evolve these tables? */ - public static final long GENERATION = 6; + public static final long GENERATION = 7; public static final String REPAIR_HISTORY = "repair_history"; From e2031a4fc2dc4be22a5fcc02ee2249643c833a3c Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 4 Nov 2024 10:18:47 -0800 Subject: [PATCH 052/257] Option to disable write path during streaming for MV enabled tables; similar to CASSANDRA-17666 --- conf/cassandra.yaml | 7 +++++++ src/java/org/apache/cassandra/config/Config.java | 4 ++++ .../apache/cassandra/config/DatabaseDescriptor.java | 10 ++++++++++ .../db/streaming/CassandraStreamReceiver.java | 8 +++++--- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index fe80fda1645d..c255138a02df 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2184,6 +2184,13 @@ report_unconfirmed_repaired_data_mismatches: false # Materialized views are considered experimental and are not recommended for production use. materialized_views_enabled: false +# Specify whether Materialized View mutations are replayed through the write path on streaming, e.g. repair. +# When enabled, Materialized View data streamed to the destination node will be written into commit log first. When setting to false, +# the streamed Materialized View data is written into SSTables just the same as normal streaming. The default is true. +# If this is set to false, streaming will be considerably faster however it's possible that, in extreme situations +# (losing > quorum # nodes in a replica set), you may have data in your SSTables that never makes it to the Materialized View. +# materialized_views_on_repair_enabled: true + # Enables SASI index creation on this node. # SASI indexes are considered experimental and are not recommended for production use. sasi_indexes_enabled: false diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 3aaec1b8a8ab..9a0c45622167 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -630,6 +630,10 @@ public static class SSTableConfig @Replaces(oldName = "enable_materialized_views", converter = Converters.IDENTITY, deprecated = true) public boolean materialized_views_enabled = false; + // When true, materialized views data in SSTable go through commit logs during internodes streaming, e.g. repair + // When false, it behaves the same as normal streaming. + public volatile boolean materialized_views_on_repair_enabled = true; + @Replaces(oldName = "enable_transient_replication", converter = Converters.IDENTITY, deprecated = true) public boolean transient_replication_enabled = false; diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 5a8077e916d2..23e48c40ebc6 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -4566,6 +4566,16 @@ public static void setMaterializedViewsEnabled(boolean enableMaterializedViews) conf.materialized_views_enabled = enableMaterializedViews; } + public static boolean isMaterializedViewsOnRepairEnabled() + { + return conf.materialized_views_on_repair_enabled; + } + + public static void setMaterializedViewsOnRepairEnabled(boolean val) + { + conf.materialized_views_on_repair_enabled = val; + } + public static boolean getSASIIndexesEnabled() { return conf.sasi_indexes_enabled; diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 8c11a0b7fbe5..3d63ca016d85 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -26,6 +26,10 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; +import org.apache.cassandra.io.sstable.SSTable; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,7 +40,6 @@ import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.compaction.OperationType; import org.apache.cassandra.db.filter.ColumnFilter; -import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; import org.apache.cassandra.db.lifecycle.LifecycleTransaction; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.ThrottledUnfilteredIterator; @@ -46,7 +49,6 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.ISSTableScanner; -import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableMultiWriter; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.service.accord.AccordService; @@ -210,7 +212,7 @@ public boolean requiresWritePath(ColumnFamilyStore cfs) { return cdcRequiresWriteCommitLog(cfs) || cfs.streamToMemtable() - || (session.streamOperation().requiresViewBuild() && hasViews(cfs)); + || (session.streamOperation().requiresViewBuild() && hasViews(cfs) && DatabaseDescriptor.isMaterializedViewsOnRepairEnabled()); } private void sendThroughWritePath(ColumnFamilyStore cfs, Collection readers) From 5c27c935c42c8748c3b8ad3d2536f2da581c8122 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 4 Nov 2024 11:00:18 -0800 Subject: [PATCH 053/257] cr from tolbertam - replace MV/CDC repair replay JVM properties to yaml configs --- .../config/CassandraRelevantProperties.java | 7 ------- .../cassandra/service/AutoRepairService.java | 7 +++---- .../cassandra/streaming/StreamOperation.java | 4 +--- .../test/repair/AutoRepairSchedulerTest.java | 12 ++---------- .../autorepair/AutoRepairParameterizedTest.java | 6 +++--- .../repair/autorepair/AutoRepairTest.java | 15 +++------------ .../service/AutoRepairServiceBasicTest.java | 9 +++------ .../service/AutoRepairServiceSetterTest.java | 3 +-- 8 files changed, 16 insertions(+), 47 deletions(-) diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index e44ba180f146..7459fecb8c34 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -550,14 +550,7 @@ public enum CassandraRelevantProperties STORAGE_HOOK("cassandra.storage_hook"), STORAGE_PORT("cassandra.storage_port"), STREAMING_HISTOGRAM_ROUND_SECONDS("cassandra.streaminghistogram.roundseconds", "60"), - - /** - * If set to true, mutations streamed during anti-entropy repair will be replayed via the regular write path for associated views. - */ - STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR("cassandra.streaming.requires_view_build_during_repair", "true"), - STREAMING_SESSION_PARALLELTRANSFERS("cassandra.streaming.session.parallelTransfers"), - STREAM_HOOK("cassandra.stream_hook"), /** Platform word size sun.arch.data.model. Examples: "32", "64", "unknown"*/ SUN_ARCH_DATA_MODEL("sun.arch.data.model"), diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index f80ce1b551bf..7a8342c2a7a4 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -17,7 +17,6 @@ */ package org.apache.cassandra.service; -import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; @@ -65,11 +64,11 @@ public void checkCanRun(RepairType repairType) if (repairType != RepairType.incremental) return; - if (CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.getBoolean()) - throw new ConfigurationException("Cannot run incremental repair while materialized view replay is enabled."); + if (DatabaseDescriptor.isMaterializedViewsOnRepairEnabled()) + throw new ConfigurationException("Cannot run incremental repair while materialized view replay is enabled. Set materialized_views_on_repair_enabled to false."); if (DatabaseDescriptor.isCDCOnRepairEnabled()) - throw new ConfigurationException("Cannot run incremental repair while CDC replay is enabled."); + throw new ConfigurationException("Cannot run incremental repair while CDC replay is enabled. Set cdc_on_repair_enabled to false."); } @Override diff --git a/src/java/org/apache/cassandra/streaming/StreamOperation.java b/src/java/org/apache/cassandra/streaming/StreamOperation.java index 2ed83addaf84..b1c5908f7fe8 100644 --- a/src/java/org/apache/cassandra/streaming/StreamOperation.java +++ b/src/java/org/apache/cassandra/streaming/StreamOperation.java @@ -17,8 +17,6 @@ */ package org.apache.cassandra.streaming; -import org.apache.cassandra.config.CassandraRelevantProperties; - public enum StreamOperation { OTHER("Other", true, false, false), // Fallback to avoid null types when deserializing from string @@ -28,7 +26,7 @@ public enum StreamOperation BOOTSTRAP("Bootstrap", false, true, false), REBUILD("Rebuild", false, true, false), BULK_LOAD("Bulk Load", true, false, false), - REPAIR("Repair", CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.getBoolean(), false, true); + REPAIR("Repair", true, false, true); private final String description; private final boolean requiresViewBuild; diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 43c29f14e0de..9b583f006507 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -26,10 +26,8 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.Uninterruptibles; -import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.schema.SystemDistributedKeyspace; -import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -53,8 +51,6 @@ public class AutoRepairSchedulerTest extends TestBaseImpl @BeforeClass public static void init() throws IOException { - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); - // Define the expected date format pattern String pattern = "EEE MMM dd HH:mm:ss z yyyy"; // Create SimpleDateFormat object with the given pattern @@ -85,12 +81,6 @@ public static void init() throws IOException cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); } - @AfterClass - public static void afterClass() - { - System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); - } - @Test public void testScheduler() throws ParseException { @@ -101,6 +91,8 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); AutoRepairService.instance.setup(); DatabaseDescriptor.setCDCOnRepairEnabled(false); AutoRepair.instance.setup(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 103b3457f919..c7533c6dba0e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -31,7 +31,6 @@ import com.google.common.collect.Sets; -import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.statements.schema.TableAttributes; import org.apache.cassandra.dht.Range; @@ -134,7 +133,8 @@ public void setup() QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); MockitoAnnotations.initMocks(this); Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).truncateBlocking(); @@ -673,7 +673,7 @@ public void testRepairSuccessAfterRetry() public void testRepairThrowsForIRWithMVReplay() { AutoRepair.instance.setup(); - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); if (repairType == AutoRepairConfig.RepairType.incremental) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index be2b6cc47d75..df9f105b615f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -21,8 +21,6 @@ import java.util.HashMap; import java.util.Map; -import org.apache.cassandra.config.CassandraRelevantProperties; -import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -57,19 +55,13 @@ public static void setupClass() throws Exception public void setup() { AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.full, true); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); AutoRepairService.setup(); } - @After - public void after() - { - System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); - } - @Test public void testSetup() { @@ -109,9 +101,8 @@ public void testSafeGuardSetupCall() @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithCDCReplay() { - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); @@ -123,8 +114,8 @@ public void testSetupFailsWhenIREnabledWithCDCReplay() public void testSetupFailsWhenIREnabledWithMVReplay() { DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(true); DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); AutoRepair instance = new AutoRepair(); instance.setup(); } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index 704d09fc7dbc..054f136dad75 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service; -import org.apache.cassandra.config.CassandraRelevantProperties; import org.junit.Before; import org.junit.Test; @@ -35,8 +34,8 @@ public class AutoRepairServiceBasicTest extends CQLTester { @Before public void setUp() { - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsEnabled(false); DatabaseDescriptor.setCDCEnabled(false); config = new AutoRepairConfig(); @@ -91,8 +90,7 @@ public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { @Test(expected = ConfigurationException.class) public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { autoRepairService.config = new AutoRepairConfig(true); - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(true); - + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); } @@ -100,8 +98,7 @@ public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setMaterializedViewsEnabled(true); - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); - + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index 58102ed7235b..f34e1f0a7071 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -19,7 +19,6 @@ package org.apache.cassandra.service; import com.google.common.collect.ImmutableSet; -import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; @@ -125,7 +124,7 @@ public void prepare() { @Test public void testSettersTest() { - CassandraRelevantProperties.STREAMING_REQUIRES_VIEW_BUILD_DURING_REPAIR.setBoolean(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); DatabaseDescriptor.setCDCOnRepairEnabled(false); setter.accept(repairType, arg); assertEquals(arg, getter.apply(repairType)); From 096b8983c96bf7bb621c489e4c464e865812bbcb Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 5 Nov 2024 17:53:33 -0800 Subject: [PATCH 054/257] ant checkstyle: toLowerCase-->toLowerCaseLocalized --- src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java | 5 +++-- src/java/org/apache/cassandra/schema/AutoRepairParams.java | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index bdb6deacf6ad..b097dd3414c4 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -25,6 +25,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepair; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; +import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; /** * Metrics related to AutoRepair. @@ -163,7 +164,7 @@ protected static class AutoRepairMetricsFactory implements MetricNameFactory protected AutoRepairMetricsFactory(RepairType repairType) { - this.repairType = repairType.toString().toLowerCase(); + this.repairType = toLowerCaseLocalized(repairType.toString()); } @Override @@ -178,7 +179,7 @@ public CassandraMetricsRegistry.MetricName createMetricName(String metricName) StringBuilder scope = new StringBuilder(); scope.append("repairType=").append(repairType); - return new CassandraMetricsRegistry.MetricName(DefaultNameFactory.GROUP_NAME, TYPE.toLowerCase(), + return new CassandraMetricsRegistry.MetricName(DefaultNameFactory.GROUP_NAME, toLowerCaseLocalized(TYPE), metricName, scope.toString(), mbeanName.toString()); } } diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index ae97e562635f..ea3802db93d2 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -29,6 +29,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import static java.lang.String.format; +import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; public final class AutoRepairParams { @@ -39,7 +40,7 @@ public enum Option @Override public String toString() { - return name().toLowerCase(); + return toLowerCaseLocalized(name()); } } @@ -67,7 +68,7 @@ public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Ma { for (Map.Entry entry : options.entrySet()) { - if (!Option.ENABLED.toString().equals(entry.getKey().toLowerCase())) + if (!Option.ENABLED.toString().equals(toLowerCaseLocalized(entry.getKey()))) { throw new ConfigurationException(format("Unknown property '%s'", entry.getKey())); } From d98cfa1d88c10b54e648e8274fab4594d71105c6 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 5 Nov 2024 19:11:26 -0800 Subject: [PATCH 055/257] code formatting: DatabaseDescriptor.java --- src/java/org/apache/cassandra/config/DatabaseDescriptor.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 23e48c40ebc6..de3e13099b21 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -61,6 +61,7 @@ import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import com.google.common.util.concurrent.RateLimiter; + import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; From c9142fa737fa38616e59285fbbc46aba655c38bb Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 20 Nov 2024 21:01:04 -0800 Subject: [PATCH 056/257] Rename AutoRepair --> UnifiedRepair --- .../pages/managing/operating/metrics.adoc | 14 +- .../org/apache/cassandra/config/Config.java | 4 +- .../cassandra/config/DatabaseDescriptor.java | 6 +- .../statements/schema/TableAttributes.java | 8 +- .../metrics/CassandraMetricsRegistry.java | 2 +- ...Metrics.java => UnifiedRepairMetrics.java} | 44 +- ....java => UnifiedRepairMetricsManager.java} | 10 +- .../DefaultUnifiedRepairTokenSplitter.java} | 12 +- .../IUnifiedRepairTokenRangeSplitter.java} | 10 +- .../UnifiedRepair.java} | 120 +++--- .../UnifiedRepairConfig.java} | 46 +- .../UnifiedRepairState.java} | 41 +- .../UnifiedRepairUtils.java} | 186 ++++----- .../cassandra/schema/SchemaKeyspace.java | 10 +- .../schema/SystemDistributedKeyspace.java | 44 +- .../apache/cassandra/schema/TableParams.java | 40 +- ...irParams.java => UnifiedRepairParams.java} | 30 +- .../cassandra/service/CassandraDaemon.java | 2 +- .../cassandra/service/StorageService.java | 14 +- ...Service.java => UnifiedRepairService.java} | 50 +-- ...an.java => UnifiedRepairServiceMBean.java} | 20 +- .../tcm/sequences/BootstrapAndJoin.java | 4 +- .../tcm/sequences/BootstrapAndReplace.java | 4 +- .../tcm/sequences/ReplaceSameAddress.java | 4 +- .../org/apache/cassandra/tools/NodeProbe.java | 94 ++--- .../org/apache/cassandra/tools/NodeTool.java | 6 +- ...onfig.java => GetUnifiedRepairConfig.java} | 22 +- ...onfig.java => SetUnifiedRepairConfig.java} | 28 +- ...irStatus.java => UnifiedRepairStatus.java} | 14 +- .../apache/cassandra/utils/FBUtilities.java | 12 +- ...t.java => UnifiedRepairSchedulerTest.java} | 34 +- test/unit/org/apache/cassandra/Util.java | 15 +- .../config/DatabaseDescriptorRefTest.java | 16 +- .../config/YamlConfigurationLoaderTest.java | 14 +- .../SSTableRepairedAtTest.java | 6 +- .../UnifiedRepairConfigTest.java} | 78 ++-- ...efaultTokenSplitterParameterizedTest.java} | 28 +- .../UnifiedRepairKeyspaceTest.java} | 10 +- .../UnifiedRepairParameterizedTest.java} | 394 +++++++++--------- .../UnifiedRepairStateFactoryTest.java} | 12 +- .../UnifiedRepairStateTest.java} | 84 ++-- .../UnifiedRepairTest.java} | 36 +- .../UnifiedRepairUtilsTest.java} | 174 ++++---- .../service/AutoRepairServiceBasicTest.java | 118 ------ .../UnifiedRepairServiceBasicTest.java | 118 ++++++ ...> UnifiedRepairServiceRepairTypeTest.java} | 26 +- ...va => UnifiedRepairServiceSetterTest.java} | 60 +-- .../cassandra/tools/JMXStandardsTest.java | 6 +- ...t.java => SetUnifiedRepairConfigTest.java} | 64 +-- ...Test.java => UnifiedRepairStatusTest.java} | 24 +- 50 files changed, 1114 insertions(+), 1104 deletions(-) rename src/java/org/apache/cassandra/metrics/{AutoRepairMetrics.java => UnifiedRepairMetrics.java} (76%) rename src/java/org/apache/cassandra/metrics/{AutoRepairMetricsManager.java => UnifiedRepairMetricsManager.java} (70%) rename src/java/org/apache/cassandra/repair/{autorepair/DefaultAutoRepairTokenSplitter.java => unifiedrepair/DefaultUnifiedRepairTokenSplitter.java} (82%) rename src/java/org/apache/cassandra/repair/{autorepair/IAutoRepairTokenRangeSplitter.java => unifiedrepair/IUnifiedRepairTokenRangeSplitter.java} (87%) rename src/java/org/apache/cassandra/repair/{autorepair/AutoRepair.java => unifiedrepair/UnifiedRepair.java} (78%) rename src/java/org/apache/cassandra/repair/{autorepair/AutoRepairConfig.java => unifiedrepair/UnifiedRepairConfig.java} (91%) rename src/java/org/apache/cassandra/repair/{autorepair/AutoRepairState.java => unifiedrepair/UnifiedRepairState.java} (86%) rename src/java/org/apache/cassandra/repair/{autorepair/AutoRepairUtils.java => unifiedrepair/UnifiedRepairUtils.java} (83%) rename src/java/org/apache/cassandra/schema/{AutoRepairParams.java => UnifiedRepairParams.java} (74%) rename src/java/org/apache/cassandra/service/{AutoRepairService.java => UnifiedRepairService.java} (72%) rename src/java/org/apache/cassandra/service/{AutoRepairServiceMBean.java => UnifiedRepairServiceMBean.java} (74%) rename src/java/org/apache/cassandra/tools/nodetool/{GetAutoRepairConfig.java => GetUnifiedRepairConfig.java} (82%) rename src/java/org/apache/cassandra/tools/nodetool/{SetAutoRepairConfig.java => SetUnifiedRepairConfig.java} (83%) rename src/java/org/apache/cassandra/tools/nodetool/{AutoRepairStatus.java => UnifiedRepairStatus.java} (82%) rename test/distributed/org/apache/cassandra/distributed/test/repair/{AutoRepairSchedulerTest.java => UnifiedRepairSchedulerTest.java} (83%) rename test/unit/org/apache/cassandra/repair/{autorepair => unifiedrepair}/SSTableRepairedAtTest.java (97%) rename test/unit/org/apache/cassandra/repair/{autorepair/AutoRepairConfigTest.java => unifiedrepair/UnifiedRepairConfigTest.java} (77%) rename test/unit/org/apache/cassandra/repair/{autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java => unifiedrepair/UnifiedRepairDefaultTokenSplitterParameterizedTest.java} (84%) rename test/unit/org/apache/cassandra/repair/{autorepair/AutoRepairKeyspaceTest.java => unifiedrepair/UnifiedRepairKeyspaceTest.java} (87%) rename test/unit/org/apache/cassandra/repair/{autorepair/AutoRepairParameterizedTest.java => unifiedrepair/UnifiedRepairParameterizedTest.java} (52%) rename test/unit/org/apache/cassandra/repair/{autorepair/AutoRepairStateFactoryTest.java => unifiedrepair/UnifiedRepairStateFactoryTest.java} (76%) rename test/unit/org/apache/cassandra/repair/{autorepair/AutoRepairStateTest.java => unifiedrepair/UnifiedRepairStateTest.java} (72%) rename test/unit/org/apache/cassandra/repair/{autorepair/AutoRepairTest.java => unifiedrepair/UnifiedRepairTest.java} (79%) rename test/unit/org/apache/cassandra/repair/{autorepair/AutoRepairUtilsTest.java => unifiedrepair/UnifiedRepairUtilsTest.java} (70%) delete mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java create mode 100644 test/unit/org/apache/cassandra/service/UnifiedRepairServiceBasicTest.java rename test/unit/org/apache/cassandra/service/{AutoRepairServiceRepairTypeTest.java => UnifiedRepairServiceRepairTypeTest.java} (72%) rename test/unit/org/apache/cassandra/service/{AutoRepairServiceSetterTest.java => UnifiedRepairServiceSetterTest.java} (59%) rename test/unit/org/apache/cassandra/tools/nodetool/{SetAutoRepairConfigTest.java => SetUnifiedRepairConfigTest.java} (79%) rename test/unit/org/apache/cassandra/tools/nodetool/{AutoRepairStatusTest.java => UnifiedRepairStatusTest.java} (76%) diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 2b3e2b75efb4..594c31078d59 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1081,16 +1081,16 @@ partitions processed per logged batch partitions processed per unlogged batch |=== -== Automated Repair Metrics +== Unified Repair Metrics -Metrics specifc to automated repair. +Metrics specifc to unified repair. Reported name format: *Metric Name*:: -`org.apache.cassandra.metrics.AutoRepair.` +`org.apache.cassandra.metrics.UnifiedRepair.` *JMX MBean*:: -`org.apache.cassandra.metrics:type=AutoRepair name= repairType=` +`org.apache.cassandra.metrics:type=UnifiedRepair name= repairType=` [cols=",,",options="header",] |=== @@ -1121,15 +1121,15 @@ on the node views considered on the node |TotalDisabledRepairTables |Gauge |Number of tables on which -the automated repair has been disabled on the node +the unified repair has been disabled on the node |RepairTurnMyTurn |Counter |Represents the node's turn to repair |RepairTurnMyTurnDueToPriority |Counter |Represents the node's turn to repair -due to priority set in the automated repair +due to priority set in the unified repair |RepairTurnMyTurnForceRepair |Counter |Represents the node's turn to repair -due to force repair set in the automated repair +due to force repair set in the unified repair |=== diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 9a0c45622167..e7bbe79f385b 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -33,7 +33,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1001,7 +1001,7 @@ public static void setClientMode(boolean clientMode) public volatile boolean password_validator_reconfiguration_enabled = true; public volatile CustomGuardrailConfig password_validator = new CustomGuardrailConfig(); - public volatile AutoRepairConfig auto_repair = new AutoRepairConfig(); + public volatile UnifiedRepairConfig unified_repair = new UnifiedRepairConfig(); /** * The variants of paxos implementation and semantics supported by Cassandra. diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index de3e13099b21..d7bdd12bf5ac 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -114,7 +114,7 @@ import org.apache.cassandra.locator.ReconnectableSnitchHelper; import org.apache.cassandra.locator.SeedProvider; import org.apache.cassandra.locator.SnitchAdapter; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import org.apache.cassandra.security.AbstractCryptoProvider; import org.apache.cassandra.security.EncryptionContext; import org.apache.cassandra.security.JREProvider; @@ -5912,9 +5912,9 @@ public static boolean getAccordEphemeralReadEnabledEnabled() return conf.accord.ephemeralReadEnabled; } - public static AutoRepairConfig getAutoRepairConfig() + public static UnifiedRepairConfig getUnifiedRepairConfig() { - return conf.auto_repair; + return conf.unified_repair; } public static double getIncrementalRepairDiskHeadroomRejectRatio() diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index c8cfca9731ac..2eb78f59b153 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -27,8 +27,8 @@ import org.apache.cassandra.cql3.statements.PropertyDefinitions; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.schema.AutoRepairParams; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.schema.UnifiedRepairParams; import org.apache.cassandra.schema.CachingParams; import org.apache.cassandra.schema.CompactionParams; import org.apache.cassandra.schema.CompressionParams; @@ -199,10 +199,10 @@ private TableParams build(TableParams.Builder builder) builder.transactionalMigrationFrom(TransactionalMigrationFromMode.fromString(getString(Option.TRANSACTIONAL_MIGRATION_FROM))); if (hasOption(Option.REPAIR_FULL)) - builder.automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, getMap(Option.REPAIR_FULL))); + builder.unifiedRepairFull(UnifiedRepairParams.fromMap(UnifiedRepairConfig.RepairType.full, getMap(Option.REPAIR_FULL))); if (hasOption(Option.REPAIR_INCREMENTAL)) - builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, getMap(Option.REPAIR_INCREMENTAL))); + builder.unifiedRepairIncremental(UnifiedRepairParams.fromMap(UnifiedRepairConfig.RepairType.incremental, getMap(Option.REPAIR_INCREMENTAL))); return builder.build(); } diff --git a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java index 11b36c606a3a..0901fa9733fa 100644 --- a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java +++ b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java @@ -154,7 +154,7 @@ public class CassandraMetricsRegistry extends MetricRegistry .add(ThreadPoolMetrics.TYPE_NAME) .add(TrieMemtableMetricsView.TYPE_NAME) .add(UnweightedCacheMetrics.TYPE_NAME) - .add(AutoRepairMetrics.TYPE_NAME) + .add(UnifiedRepairMetrics.TYPE_NAME) .build(); } diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/UnifiedRepairMetrics.java similarity index 76% rename from src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java rename to src/java/org/apache/cassandra/metrics/UnifiedRepairMetrics.java index b097dd3414c4..d4929cbfefb0 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/UnifiedRepairMetrics.java @@ -20,19 +20,19 @@ import com.codahale.metrics.Counter; import com.codahale.metrics.Gauge; import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils; -import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepair; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; /** - * Metrics related to AutoRepair. + * Metrics related to UnifiedRepair. */ -public class AutoRepairMetrics +public class UnifiedRepairMetrics { - public static final String TYPE_NAME = "autorepair"; + public static final String TYPE_NAME = "unifiedrepair"; public Gauge repairsInProgress; public Gauge nodeRepairTimeInSec; public Gauge clusterRepairTimeInSec; @@ -47,15 +47,15 @@ public class AutoRepairMetrics public Gauge totalMVTablesConsideredForRepair; public Gauge totalDisabledRepairTables; - public AutoRepairMetrics(RepairType repairType) + public UnifiedRepairMetrics(RepairType repairType) { - AutoRepairMetricsFactory factory = new AutoRepairMetricsFactory(repairType); + UnifiedRepairMetricsFactory factory = new UnifiedRepairMetricsFactory(repairType); repairsInProgress = Metrics.register(factory.createMetricName("RepairsInProgress"), new Gauge() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).isRepairInProgress() ? 1 : 0; + return UnifiedRepair.instance.getRepairState(repairType).isRepairInProgress() ? 1 : 0; } }); @@ -63,7 +63,7 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getNodeRepairTimeInSec(); + return UnifiedRepair.instance.getRepairState(repairType).getNodeRepairTimeInSec(); } }); @@ -71,7 +71,7 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getClusterRepairTimeInSec(); + return UnifiedRepair.instance.getRepairState(repairType).getClusterRepairTimeInSec(); } }); @@ -79,7 +79,7 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getSkippedTokenRangesCount(); + return UnifiedRepair.instance.getRepairState(repairType).getSkippedTokenRangesCount(); } }); @@ -87,7 +87,7 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getSkippedTablesCount(); + return UnifiedRepair.instance.getRepairState(repairType).getSkippedTablesCount(); } }); @@ -96,7 +96,7 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getLongestUnrepairedSec(); + return UnifiedRepair.instance.getRepairState(repairType).getLongestUnrepairedSec(); } }); @@ -104,7 +104,7 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getSucceededTokenRangesCount(); + return UnifiedRepair.instance.getRepairState(repairType).getSucceededTokenRangesCount(); } }); @@ -112,7 +112,7 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getFailedTokenRangesCount(); + return UnifiedRepair.instance.getRepairState(repairType).getFailedTokenRangesCount(); } }); @@ -124,7 +124,7 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getTotalMVTablesConsideredForRepair(); + return UnifiedRepair.instance.getRepairState(repairType).getTotalMVTablesConsideredForRepair(); } }); @@ -132,12 +132,12 @@ public Integer getValue() { public Integer getValue() { - return AutoRepair.instance.getRepairState(repairType).getTotalDisabledTablesRepairCount(); + return UnifiedRepair.instance.getRepairState(repairType).getTotalDisabledTablesRepairCount(); } }); } - public void recordTurn(AutoRepairUtils.RepairTurn turn) + public void recordTurn(UnifiedRepairUtils.RepairTurn turn) { switch (turn) { @@ -156,13 +156,13 @@ public void recordTurn(AutoRepairUtils.RepairTurn turn) } @VisibleForTesting - protected static class AutoRepairMetricsFactory implements MetricNameFactory + protected static class UnifiedRepairMetricsFactory implements MetricNameFactory { - private static final String TYPE = "AutoRepair"; + private static final String TYPE = "UnifiedRepair"; @VisibleForTesting protected final String repairType; - protected AutoRepairMetricsFactory(RepairType repairType) + protected UnifiedRepairMetricsFactory(RepairType repairType) { this.repairType = toLowerCaseLocalized(repairType.toString()); } diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java b/src/java/org/apache/cassandra/metrics/UnifiedRepairMetricsManager.java similarity index 70% rename from src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java rename to src/java/org/apache/cassandra/metrics/UnifiedRepairMetricsManager.java index e293945c9846..e8057595944d 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java +++ b/src/java/org/apache/cassandra/metrics/UnifiedRepairMetricsManager.java @@ -18,17 +18,17 @@ package org.apache.cassandra.metrics; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -public class AutoRepairMetricsManager +public class UnifiedRepairMetricsManager { - private static final Map metrics = new ConcurrentHashMap<>(); + private static final Map metrics = new ConcurrentHashMap<>(); - public static AutoRepairMetrics getMetrics(RepairType repairType) + public static UnifiedRepairMetrics getMetrics(RepairType repairType) { - return metrics.computeIfAbsent(repairType, k -> new AutoRepairMetrics(repairType)); + return metrics.computeIfAbsent(repairType, k -> new UnifiedRepairMetrics(repairType)); } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/unifiedrepair/DefaultUnifiedRepairTokenSplitter.java similarity index 82% rename from src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java rename to src/java/org/apache/cassandra/repair/unifiedrepair/DefaultUnifiedRepairTokenSplitter.java index 9a884f61c581..98dc09f8ad7d 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/unifiedrepair/DefaultUnifiedRepairTokenSplitter.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.ArrayList; @@ -23,20 +23,20 @@ import java.util.Collections; import java.util.List; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.UnifiedRepairService; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.service.StorageService; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.splitEvenly; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.splitEvenly; -public class DefaultAutoRepairTokenSplitter implements IAutoRepairTokenRangeSplitter +public class DefaultUnifiedRepairTokenSplitter implements IUnifiedRepairTokenRangeSplitter { @Override - public List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) + public List getRepairAssignments(UnifiedRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); List repairAssignments = new ArrayList<>(); Collection> tokens = StorageService.instance.getPrimaryRanges(keyspaceName); diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/unifiedrepair/IUnifiedRepairTokenRangeSplitter.java similarity index 87% rename from src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java rename to src/java/org/apache/cassandra/repair/unifiedrepair/IUnifiedRepairTokenRangeSplitter.java index 169600eca405..251e6b96ec56 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/unifiedrepair/IUnifiedRepairTokenRangeSplitter.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.List; @@ -24,22 +24,22 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -public interface IAutoRepairTokenRangeSplitter +public interface IUnifiedRepairTokenRangeSplitter { /** * Split the token range you wish to repair into multiple assignments. - * The autorepair framework will repair the list of returned subrange in a sequence. + * The unifiedrepair framework will repair the list of returned subrange in a sequence. * @param repairType The type of repair being executed * @param primaryRangeOnly Whether to repair only this node's primary ranges or all of its ranges. * @param keyspaceName The keyspace being repaired * @param tableNames The tables to repair * @return repair assignments broken up by range, keyspace and tables. */ - List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames); + List getRepairAssignments(UnifiedRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames); /** - * Defines a repair assignment to be issued by the autorepair framework. + * Defines a repair assignment to be issued by the unifiedrepair framework. */ class RepairAssignment { diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepair.java similarity index 78% rename from src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java rename to src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepair.java index 1abb71d69cac..02db8951fa64 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepair.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.ArrayList; import java.util.EnumMap; @@ -40,7 +40,7 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter.RepairAssignment; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,35 +53,35 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.UnifiedRepairService; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn; import org.apache.cassandra.utils.concurrent.Future; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; -public class AutoRepair +public class UnifiedRepair { - private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); + private static final Logger logger = LoggerFactory.getLogger(UnifiedRepair.class); @VisibleForTesting protected static Supplier timeFunc = Clock.Global::currentTimeMillis; - public static AutoRepair instance = new AutoRepair(); + public static UnifiedRepair instance = new UnifiedRepair(); // Sleep for 5 seconds if repair finishes quickly to flush JMX metrics; it happens only for Cassandra nodes with tiny amount of data. public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); @VisibleForTesting - protected final Map repairExecutors; + protected final Map repairExecutors; - protected final Map repairRunnableExecutors; + protected final Map repairRunnableExecutors; @VisibleForTesting - protected final Map repairStates; + protected final Map repairStates; @VisibleForTesting protected static Consumer> shuffleFunc = java.util.Collections::shuffle; @@ -89,23 +89,23 @@ public class AutoRepair @VisibleForTesting protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; - protected final Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); + protected final Map tokenRangeSplitters = new EnumMap<>(UnifiedRepairConfig.RepairType.class); private boolean isSetupDone = false; @VisibleForTesting - protected AutoRepair() + protected UnifiedRepair() { - AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); - repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); - repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); - repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + UnifiedRepairConfig config = DatabaseDescriptor.getUnifiedRepairConfig(); + repairExecutors = new EnumMap<>(UnifiedRepairConfig.RepairType.class); + repairRunnableExecutors = new EnumMap<>(UnifiedRepairConfig.RepairType.class); + repairStates = new EnumMap<>(UnifiedRepairConfig.RepairType.class); + for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) { - repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType, Thread.NORM_PRIORITY)); - repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType, Thread.NORM_PRIORITY)); - repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); - tokenRangeSplitters.put(repairType, FBUtilities.newAutoRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType))); + repairExecutors.put(repairType, executorFactory().scheduled(false, "UnifiedRepair-Repair-" + repairType, Thread.NORM_PRIORITY)); + repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "UnifiedRepair-RepairRunnable-" + repairType, Thread.NORM_PRIORITY)); + repairStates.put(repairType, UnifiedRepairConfig.RepairType.getUnifiedRepairState(repairType)); + tokenRangeSplitters.put(repairType, FBUtilities.newUnifiedRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType))); } } @@ -119,13 +119,13 @@ public void setup() { return; } - AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); - AutoRepairUtils.setup(); + UnifiedRepairConfig config = DatabaseDescriptor.getUnifiedRepairConfig(); + UnifiedRepairUtils.setup(); - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) { - if (config.isAutoRepairEnabled(repairType)) - AutoRepairService.instance.checkCanRun(repairType); + if (config.isUnifiedRepairEnabled(repairType)) + UnifiedRepairService.instance.checkCanRun(repairType); repairExecutors.get(repairType).scheduleWithFixedDelay( () -> repair(repairType), @@ -138,26 +138,26 @@ public void setup() } // repairAsync runs a repair session of the given type asynchronously. - public void repairAsync(AutoRepairConfig.RepairType repairType) + public void repairAsync(UnifiedRepairConfig.RepairType repairType) { - if (!AutoRepairService.instance.getAutoRepairConfig().isAutoRepairEnabled(repairType)) + if (!UnifiedRepairService.instance.getUnifiedRepairConfig().isUnifiedRepairEnabled(repairType)) { - throw new ConfigurationException("Auto-repair is disabled for repair type " + repairType); + throw new ConfigurationException("Unified-repair is disabled for repair type " + repairType); } repairExecutors.get(repairType).submit(() -> repair(repairType)); } // repair runs a repair session of the given type synchronously. - public void repair(AutoRepairConfig.RepairType repairType) + public void repair(UnifiedRepairConfig.RepairType repairType) { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - if (!config.isAutoRepairEnabled(repairType)) + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + if (!config.isUnifiedRepairEnabled(repairType)) { - logger.debug("Auto-repair is disabled for repair type {}", repairType); + logger.debug("Unified-repair is disabled for repair type {}", repairType); return; } - AutoRepairService.instance.checkCanRun(repairType); - AutoRepairState repairState = repairStates.get(repairType); + UnifiedRepairService.instance.checkCanRun(repairType); + UnifiedRepairState repairState = repairStates.get(repairType); try { String localDC = DatabaseDescriptor.getLocalDataCenter(); @@ -168,16 +168,16 @@ public void repair(AutoRepairConfig.RepairType repairType) } // refresh the longest unrepaired node - repairState.setLongestUnrepairedNode(AutoRepairUtils.getHostWithLongestUnrepairTime(repairType)); + repairState.setLongestUnrepairedNode(UnifiedRepairUtils.getHostWithLongestUnrepairTime(repairType)); //consistency level to use for local query UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - RepairTurn turn = AutoRepairUtils.myTurnToRunRepair(repairType, myId); + RepairTurn turn = UnifiedRepairUtils.myTurnToRunRepair(repairType, myId); if (turn == MY_TURN || turn == MY_TURN_DUE_TO_PRIORITY || turn == MY_TURN_FORCE_REPAIR) { repairState.recordTurn(turn); - // For normal auto repair, we will use primary range only repairs (Repair with -pr option). - // For some cases, we may set the auto_repair_primary_token_range_only flag to false then we will do repair + // For normal unified repair, we will use primary range only repairs (Repair with -pr option). + // For some cases, we may set the unified_repair_primary_token_range_only flag to false then we will do repair // without -pr. We may also do force repair for certain node that we want to repair all the data on one node // When doing force repair, we want to repair without -pr. boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) @@ -190,7 +190,7 @@ public void repair(AutoRepairConfig.RepairType repairType) long startTime = timeFunc.get(); logger.info("My host id: {}, my turn to run repair...repair primary-ranges only? {}", myId, config.getRepairPrimaryTokenRangeOnly(repairType)); - AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId, timeFunc.get(), turn); + UnifiedRepairUtils.updateStartUnifiedRepairHistory(repairType, myId, timeFunc.get(), turn); repairState.setRepairKeyspaceCount(0); repairState.setRepairInProgress(true); @@ -201,13 +201,13 @@ public void repair(AutoRepairConfig.RepairType repairType) List keyspaces = new ArrayList<>(); Keyspace.all().forEach(keyspaces::add); - // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair + // Unified-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. shuffleFunc.accept(keyspaces); for (Keyspace keyspace : keyspaces) { - if (!AutoRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) + if (!UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) { continue; } @@ -237,20 +237,20 @@ public void repair(AutoRepairConfig.RepairType repairType) tableStartTime = timeFunc.get(); } previousAssignment = curRepairAssignment; - if (!config.isAutoRepairEnabled(repairType)) + if (!config.isUnifiedRepairEnabled(repairType)) { - logger.error("Auto-repair for type {} is disabled hence not running repair", repairType); + logger.error("Unified-repair for type {} is disabled hence not running repair", repairType); repairState.setRepairInProgress(false); return; } - if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, tablesToBeRepairedList.size())) + if (UnifiedRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, tablesToBeRepairedList.size())) { collectectedRepairStats.skippedTokenRanges += totalRepairAssignments - totalProcessedAssignments; logger.info("Keyspace took too much time to repair hence skipping it {}", keyspaceName); break; } - if (repairOneTableAtATime && AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) + if (repairOneTableAtATime && UnifiedRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) { collectectedRepairStats.skippedTokenRanges += 1; logger.info("Table took too much time to repair hence skipping it table name {}.{}, token range {}", @@ -339,17 +339,17 @@ else if (retryCount < config.getRepairMaxRetries()) } catch (Exception e) { - logger.error("Exception in autorepair:", e); + logger.error("Exception in unifiedrepair:", e); } } - private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoRepairState repairState, AutoRepairConfig config, UUID myId) + private boolean tooSoonToRunRepair(UnifiedRepairConfig.RepairType repairType, UnifiedRepairState repairState, UnifiedRepairConfig config, UUID myId) { if (repairState.getLastRepairTime() == 0) { // the node has either just boooted or has not run repair before, // we should check for the node's repair history in the DB - repairState.setLastRepairTime(AutoRepairUtils.getLastRepairTimeForNode(repairType, myId)); + repairState.setLastRepairTime(UnifiedRepairUtils.getLastRepairTimeForNode(repairType, myId)); } /** check if it is too soon to run repair. one of the reason we * should not run frequent repair is that repair triggers @@ -365,7 +365,7 @@ private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoR return false; } - private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairConfig config, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, CollectectedRepairStats collectectedRepairStats) + private List retrieveTablesToBeRepaired(Keyspace keyspace, UnifiedRepairConfig config, UnifiedRepairConfig.RepairType repairType, UnifiedRepairState repairState, CollectectedRepairStats collectectedRepairStats) { Tables tables = keyspace.getMetadata().tables; List tablesToBeRepaired = new ArrayList<>(); @@ -377,7 +377,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon String tableName = tableMetadata.name; ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); - if (!columnFamilyStore.metadata().params.automatedRepair.get(repairType).repairEnabled()) + if (!columnFamilyStore.metadata().params.unifiedRepair.get(repairType).repairEnabled()) { logger.info("Repair is disabled for keyspace {} for tables: {}", keyspace.getName(), tableName); repairState.setTotalDisabledTablesRepairCount(repairState.getTotalDisabledTablesRepairCount() + 1); @@ -385,7 +385,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon continue; } - // this is done to make autorepair safe as running repair on table with more sstables + // this is done to make unifiedrepair safe as running repair on table with more sstables // may have its own challenges int totalSSTables = columnFamilyStore.getLiveSSTables().size(); if (totalSSTables > config.getRepairSSTableCountHigherThreshold(repairType)) @@ -399,7 +399,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon tablesToBeRepaired.add(tableName); // See if we should repair MVs as well that are associated with this given table - List mvs = AutoRepairUtils.getAllMVs(repairType, keyspace, tableMetadata); + List mvs = UnifiedRepairUtils.getAllMVs(repairType, keyspace, tableMetadata); if (!mvs.isEmpty()) { tablesToBeRepaired.addAll(mvs); @@ -409,14 +409,14 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon return tablesToBeRepaired; } - private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, + private void cleanupAndUpdateStats(RepairTurn turn, UnifiedRepairConfig.RepairType repairType, UnifiedRepairState repairState, UUID myId, long startTime, CollectectedRepairStats collectectedRepairStats) throws InterruptedException { //if it was due to priority then remove it now if (turn == MY_TURN_DUE_TO_PRIORITY) { logger.info("Remove current host from priority list"); - AutoRepairUtils.removePriorityStatus(repairType, myId); + UnifiedRepairUtils.removePriorityStatus(repairType, myId); } repairState.setFailedTokenRangesCount(collectectedRepairStats.failedTokenRanges); @@ -446,10 +446,10 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType Thread.sleep(SLEEP_IF_REPAIR_FINISHES_QUICKLY.toMilliseconds()); } repairState.setRepairInProgress(false); - AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId, timeFunc.get()); + UnifiedRepairUtils.updateFinishUnifiedRepairHistory(repairType, myId, timeFunc.get()); } - public AutoRepairState getRepairState(AutoRepairConfig.RepairType repairType) + public UnifiedRepairState getRepairState(UnifiedRepairConfig.RepairType repairType) { return repairStates.get(repairType); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfig.java similarity index 91% rename from src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java rename to src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfig.java index a80a875125a9..33f37b8f57ee 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfig.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.io.Serializable; import java.util.ArrayList; @@ -34,9 +34,9 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.config.ParameterizedClass; -public class AutoRepairConfig implements Serializable +public class UnifiedRepairConfig implements Serializable { - // enable/disable auto repair globally, overrides all other settings. Cannot be modified dynamically. + // enable/disable unified repair globally, overrides all other settings. Cannot be modified dynamically. // if it is set to false, then no repair will be scheduled, including full and incremental repairs by this framework. // if it is set to true, then this repair scheduler will consult another config available for each RepairType, and based on that config, it will schedule repairs. public volatile Boolean enabled; @@ -61,7 +61,7 @@ public enum RepairType implements Serializable full, incremental; - public static AutoRepairState getAutoRepairState(RepairType repairType) + public static UnifiedRepairState getUnifiedRepairState(RepairType repairType) { switch (repairType) { @@ -78,12 +78,12 @@ public static AutoRepairState getAutoRepairState(RepairType repairType) // repair_type_overrides overrides the global_settings for a specific repair type public volatile Map repair_type_overrides = new EnumMap<>(RepairType.class); - public AutoRepairConfig() + public UnifiedRepairConfig() { this(false); } - public AutoRepairConfig(boolean enabled) + public UnifiedRepairConfig(boolean enabled) { this.enabled = enabled; global_settings = Options.getDefaultOptions(); @@ -98,12 +98,12 @@ public DurationSpec.IntSecondsBound getRepairCheckInterval() return repair_check_interval; } - public boolean isAutoRepairSchedulingEnabled() + public boolean isUnifiedRepairSchedulingEnabled() { return enabled; } - public DurationSpec.IntSecondsBound getAutoRepairHistoryClearDeleteHostsBufferInterval() + public DurationSpec.IntSecondsBound getUnifiedRepairHistoryClearDeleteHostsBufferInterval() { return history_clear_delete_hosts_buffer_interval; } @@ -111,10 +111,10 @@ public DurationSpec.IntSecondsBound getAutoRepairHistoryClearDeleteHostsBufferIn public void startScheduler() { enabled = true; - AutoRepair.instance.setup(); + UnifiedRepair.instance.setup(); } - public void setAutoRepairHistoryClearDeleteHostsBufferInterval(String duration) + public void setUnifiedRepairHistoryClearDeleteHostsBufferInterval(String duration) { history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound(duration); } @@ -139,12 +139,12 @@ public void setRepairRetryBackoff(String interval) repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); } - public boolean isAutoRepairEnabled(RepairType repairType) + public boolean isUnifiedRepairEnabled(RepairType repairType) { return enabled && applyOverrides(repairType, opt -> opt.enabled); } - public void setAutoRepairEnabled(RepairType repairType, boolean enabled) + public void setUnifiedRepairEnabled(RepairType repairType, boolean enabled) { ensureOverrides(repairType); repair_type_overrides.get(repairType).enabled = enabled; @@ -205,15 +205,15 @@ public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssta repair_type_overrides.get(repairType).sstable_upper_threshold = sstableHigherThreshold; } - public DurationSpec.IntSecondsBound getAutoRepairTableMaxRepairTime(RepairType repairType) + public DurationSpec.IntSecondsBound getUnifiedRepairTableMaxRepairTime(RepairType repairType) { return applyOverrides(repairType, opt -> opt.table_max_repair_time); } - public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) + public void setUnifiedRepairTableMaxRepairTime(RepairType repairType, String unifiedRepairTableMaxRepairTime) { ensureOverrides(repairType); - repair_type_overrides.get(repairType).table_max_repair_time = new DurationSpec.IntSecondsBound(autoRepairTableMaxRepairTime); + repair_type_overrides.get(repairType).table_max_repair_time = new DurationSpec.IntSecondsBound(unifiedRepairTableMaxRepairTime); } public Set getIgnoreDCs(RepairType repairType) @@ -309,11 +309,11 @@ public void setRepairSessionTimeout(RepairType repairType, String repairSessionT repair_type_overrides.get(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); } - // Options configures auto-repair behavior for a given repair type. + // Options configures unified-repair behavior for a given repair type. // All fields can be modified dynamically. public static class Options implements Serializable { - // defaultOptions defines the default auto-repair behavior when no overrides are defined + // defaultOptions defines the default unified-repair behavior when no overrides are defined @VisibleForTesting protected static final Options defaultOptions = getDefaultOptions(); @@ -339,16 +339,16 @@ protected static Options getDefaultOptions() opts.force_repair_new_node = false; opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.mv_repair_enabled = false; - opts.token_range_splitter = new ParameterizedClass(DefaultAutoRepairTokenSplitter.class.getName(), Collections.emptyMap()); + opts.token_range_splitter = new ParameterizedClass(DefaultUnifiedRepairTokenSplitter.class.getName(), Collections.emptyMap()); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); // 5 minutes opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours return opts; } - // enable/disable auto repair for the given repair type + // enable/disable unified repair for the given repair type public volatile Boolean enabled; - // auto repair is default repair table by table, if this is enabled, the framework will repair all the tables in a keyspace in one go. + // unified repair is default repair table by table, if this is enabled, the framework will repair all the tables in a keyspace in one go. public volatile Boolean repair_by_keyspace; // the number of subranges to split each to-be-repaired token range into, // the higher this number, the smaller the repair sessions will be @@ -387,7 +387,7 @@ protected static Options getDefaultOptions() // specifies a denylist of datacenters to repair // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. public volatile Set ignore_dcs; - // Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' + // Set this 'true' if UnifiedRepair should repair only the primary ranges owned by this node; else, 'false' // It is the same as -pr in nodetool repair options. public volatile Boolean repair_primary_token_range_only; // configures whether to force immediate repair on new nodes @@ -401,9 +401,9 @@ protected static Options getDefaultOptions() // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. public volatile DurationSpec.IntSecondsBound table_max_repair_time; // the default is 'true'. - // This flag determines whether the auto-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. + // This flag determines whether the unified-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. public volatile Boolean mv_repair_enabled; - // the default is DefaultAutoRepairTokenSplitter. The class should implement IAutoRepairTokenRangeSplitter. + // the default is DefaultUnifiedRepairTokenSplitter. The class should implement IUnifiedRepairTokenRangeSplitter. // The default implementation splits the tokens based on the token ranges owned by this node divided by the number of 'number_of_subranges' public volatile ParameterizedClass token_range_splitter; // the minimum delay after a node starts before the scheduler starts running repair diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairState.java similarity index 86% rename from src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java rename to src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairState.java index b2a69c31a44c..1a95bcac88ad 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairState.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import com.google.common.annotations.VisibleForTesting; @@ -26,14 +26,14 @@ import org.apache.cassandra.db.view.TableViews; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.metrics.AutoRepairMetricsManager; -import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.UnifiedRepairMetricsManager; +import org.apache.cassandra.metrics.UnifiedRepairMetrics; import org.apache.cassandra.repair.RepairCoordinator; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.UnifiedRepairHistory; import org.apache.cassandra.repair.RepairParallelism; import org.apache.cassandra.repair.messages.RepairOption; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.UnifiedRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.Clock; @@ -55,10 +55,10 @@ import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; -// AutoRepairState represents the state of automated repair for a given repair type. -public abstract class AutoRepairState implements ProgressListener +// UnifiedRepairState represents the state of unified repair for a given repair type. +public abstract class UnifiedRepairState implements ProgressListener { - protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); + protected static final Logger logger = LoggerFactory.getLogger(UnifiedRepairState.class); private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); @VisibleForTesting protected static Supplier timeFunc = Clock.Global::currentTimeMillis; @@ -93,16 +93,16 @@ public abstract class AutoRepairState implements ProgressListener protected int skippedTablesCount = 0; @VisibleForTesting - protected AutoRepairHistory longestUnrepairedNode; + protected UnifiedRepairHistory longestUnrepairedNode; @VisibleForTesting protected Condition condition = newOneTimeCondition(); @VisibleForTesting protected boolean success = true; - protected final AutoRepairMetrics metrics; + protected final UnifiedRepairMetrics metrics; - protected AutoRepairState(RepairType repairType) + protected UnifiedRepairState(RepairType repairType) { - metrics = AutoRepairMetricsManager.getMetrics(repairType); + metrics = UnifiedRepairMetricsManager.getMetrics(repairType); this.repairType = repairType; } @@ -229,7 +229,7 @@ public int getRepairKeyspaceCount() return repairKeyspaceCount; } - public void setLongestUnrepairedNode(AutoRepairHistory longestUnrepairedNode) + public void setLongestUnrepairedNode(UnifiedRepairHistory longestUnrepairedNode) { this.longestUnrepairedNode = longestUnrepairedNode; } @@ -279,7 +279,7 @@ public boolean isSuccess() return success; } - public void recordTurn(AutoRepairUtils.RepairTurn turn) + public void recordTurn(UnifiedRepairUtils.RepairTurn turn) { metrics.recordTurn(turn); } @@ -300,7 +300,7 @@ public void resetWaitCondition() } } -class IncrementalRepairState extends AutoRepairState +class IncrementalRepairState extends UnifiedRepairState { public IncrementalRepairState() { @@ -311,7 +311,8 @@ public IncrementalRepairState() public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, true, false, - AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.NONE, true, true, false, false, false, false); + UnifiedRepairService.instance.getUnifiedRepairConfig().getRepairThreads(repairType), ranges, + false, false, PreviewKind.NONE, true, true, false, false, false, false); option.getColumnFamilies().addAll(filterOutUnsafeTables(keyspace, tables)); @@ -344,7 +345,7 @@ protected List filterOutUnsafeTables(String keyspaceName, List t } } -class FullRepairState extends AutoRepairState +class FullRepairState extends UnifiedRepairState { public FullRepairState() { @@ -355,7 +356,9 @@ public FullRepairState() public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, - AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.NONE, true, true, false, false, false, false); + UnifiedRepairService.instance.getUnifiedRepairConfig().getRepairThreads(repairType), ranges, + false, false, PreviewKind.NONE, true, true, false, false, false, false); + option.getColumnFamilies().addAll(tables); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtils.java similarity index 83% rename from src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java rename to src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtils.java index 0a328de44f98..ed7af2963515 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtils.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.ArrayList; import java.util.Arrays; @@ -61,7 +61,7 @@ import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.serializers.SetSerializer; import org.apache.cassandra.serializers.UUIDSerializer; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.UnifiedRepairService; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageService; @@ -73,21 +73,21 @@ import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.NOT_MY_TURN; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; /** - * This class serves as a utility class for AutoRepair. It contains various helper APIs + * This class serves as a utility class for UnifiedRepair. It contains various helper APIs * to store/retrieve repair status, decide whose turn is next, etc. */ -public class AutoRepairUtils +public class UnifiedRepairUtils { - private static final Logger logger = LoggerFactory.getLogger(AutoRepairUtils.class); + private static final Logger logger = LoggerFactory.getLogger(UnifiedRepairUtils.class); static final String COL_REPAIR_TYPE = "repair_type"; static final String COL_HOST_ID = "host_id"; static final String COL_REPAIR_START_TS = "repair_start_ts"; @@ -100,56 +100,56 @@ public class AutoRepairUtils final static String SELECT_REPAIR_HISTORY = String.format( "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_TYPE); final static String SELECT_REPAIR_PRIORITY = String.format( "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String DEL_REPAIR_PRIORITY = String.format( "DELETE %s[?] FROM %s.%s WHERE %s = ?", COL_REPAIR_PRIORITY, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String ADD_PRIORITY_HOST = String.format( "UPDATE %s.%s SET %s = %s + ? WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String INSERT_NEW_REPAIR_HISTORY = String.format( "INSERT INTO %s.%s (%s, %s, %s, %s, %s, %s) values (?, ? ,?, ?, {}, ?) IF NOT EXISTS", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID, COL_REPAIR_START_TS, COL_REPAIR_FINISH_TS, COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME); final static String ADD_HOST_ID_TO_DELETE_HOSTS = String.format( "UPDATE %s.%s SET %s = %s + ?, %s = ? WHERE %s = ? AND %s = ? IF EXISTS" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_DELETE_HOSTS, COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME, COL_REPAIR_TYPE, COL_HOST_ID); - final static String DEL_AUTO_REPAIR_HISTORY = String.format( + final static String DEL_UNIFIED_REPAIR_HISTORY = String.format( "DELETE FROM %s.%s WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); final static String RECORD_START_REPAIR_HISTORY = String.format( "UPDATE %s.%s SET %s= ?, repair_turn = ? WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_START_TS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_START_TS, COL_REPAIR_TYPE, COL_HOST_ID); final static String RECORD_FINISH_REPAIR_HISTORY = String.format( "UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); final static String CLEAR_DELETE_HOSTS = String.format( "UPDATE %s.%s SET %s= {} WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_DELETE_HOSTS, COL_REPAIR_TYPE, COL_HOST_ID); final static String SET_FORCE_REPAIR = String.format( "UPDATE %s.%s SET %s=true WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_FORCE_REPAIR, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); final static String SELECT_LAST_REPAIR_TIME_FOR_NODE = String.format( "SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?", COL_REPAIR_FINISH_TS, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); + SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); static ModificationStatement delStatementRepairHistory; static SelectStatement selectStatementRepairHistory; @@ -199,14 +199,14 @@ public static void setup() .forInternalCalls()); clearDeleteHostsStatement = (ModificationStatement) QueryProcessor.getStatement(CLEAR_DELETE_HOSTS, ClientState .forInternalCalls()); - delStatementRepairHistory = (ModificationStatement) QueryProcessor.getStatement(DEL_AUTO_REPAIR_HISTORY, ClientState + delStatementRepairHistory = (ModificationStatement) QueryProcessor.getStatement(DEL_UNIFIED_REPAIR_HISTORY, ClientState .forInternalCalls()); - Keyspace autoRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); - internalQueryCL = autoRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? + Keyspace unifiedRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); + internalQueryCL = unifiedRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? ConsistencyLevel.LOCAL_QUORUM : ConsistencyLevel.ONE; } - public static class AutoRepairHistory + public static class UnifiedRepairHistory { UUID hostId; String repairTurn; @@ -216,8 +216,8 @@ public static class AutoRepairHistory long deleteHostsUpdateTime; boolean forceRepair; - public AutoRepairHistory(UUID hostId, String repairTurn, long lastRepairStartTime, long lastRepairFinishTime, - Set deleteHosts, long deleteHostsUpateTime, boolean forceRepair) + public UnifiedRepairHistory(UUID hostId, String repairTurn, long lastRepairStartTime, long lastRepairFinishTime, + Set deleteHosts, long deleteHostsUpateTime, boolean forceRepair) { this.hostId = hostId; this.repairTurn = repairTurn; @@ -260,15 +260,15 @@ public static class CurrentRepairStatus public Set hostIdsWithOnGoingRepair; // hosts that is running repair public Set hostIdsWithOnGoingForceRepair; // hosts that is running repair because of force repair Set priority; - List historiesWithoutOnGoingRepair; // hosts that is NOT running repair + List historiesWithoutOnGoingRepair; // hosts that is NOT running repair - public CurrentRepairStatus(List repairHistories, Set priority) + public CurrentRepairStatus(List repairHistories, Set priority) { hostIdsWithOnGoingRepair = new HashSet<>(); hostIdsWithOnGoingForceRepair = new HashSet<>(); historiesWithoutOnGoingRepair = new ArrayList<>(); - for (AutoRepairHistory history : repairHistories) + for (UnifiedRepairHistory history : repairHistories) { if (history.isRepairRunning()) { @@ -301,7 +301,7 @@ public String toString() } @VisibleForTesting - public static List getAutoRepairHistory(RepairType repairType) + public static List getUnifiedRepairHistory(RepairType repairType) { UntypedResultSet repairHistoryResult; @@ -309,7 +309,7 @@ public static List getAutoRepairHistory(RepairType repairType QueryOptions.forInternalCalls(internalQueryCL, Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()))), Dispatcher.RequestTime.forImmediateExecution()); repairHistoryResult = UntypedResultSet.create(repairStatusRows.result); - List repairHistories = new ArrayList<>(); + List repairHistories = new ArrayList<>(); if (repairHistoryResult.size() > 0) { for (UntypedResultSet.Row row : repairHistoryResult) @@ -323,8 +323,8 @@ public static List getAutoRepairHistory(RepairType repairType Set deleteHosts = row.getSet(COL_DELETE_HOSTS, UUIDType.instance); long deleteHostsUpdateTime = row.getLong(COL_DELETE_HOSTS_UPDATE_TIME, 0); Boolean forceRepair = row.has(COL_FORCE_REPAIR) ? row.getBoolean(COL_FORCE_REPAIR) : false; - repairHistories.add(new AutoRepairHistory(hostId, repairTurn, lastRepairStartTime, lastRepairFinishTime, - deleteHosts, deleteHostsUpdateTime, forceRepair)); + repairHistories.add(new UnifiedRepairHistory(hostId, repairTurn, lastRepairStartTime, lastRepairFinishTime, + deleteHosts, deleteHostsUpdateTime, forceRepair)); } return repairHistories; } @@ -374,8 +374,8 @@ public static void setForceRepair(RepairType repairType, UUID hostId) public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType) { - List autoRepairHistories = getAutoRepairHistory(repairType); - return getCurrentRepairStatus(repairType, autoRepairHistories); + List unifiedRepairHistories = getUnifiedRepairHistory(repairType); + return getCurrentRepairStatus(repairType, unifiedRepairHistories); } public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) @@ -397,11 +397,11 @@ public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) return repairTime.one().getLong(COL_REPAIR_FINISH_TS); } - public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories) + public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List unifiedRepairHistories) { - if (autoRepairHistories != null) + if (unifiedRepairHistories != null) { - CurrentRepairStatus status = new CurrentRepairStatus(autoRepairHistories, getPriorityHostIds(repairType)); + CurrentRepairStatus status = new CurrentRepairStatus(unifiedRepairHistories, getPriorityHostIds(repairType)); return status; } @@ -415,7 +415,7 @@ protected static TreeSet getHostIdsInCurrentRing(RepairType repairType, Co for (NodeAddresses node : allNodesInRing) { String nodeDC = DatabaseDescriptor.getLocator().location(node.broadcastAddress).datacenter; - if (AutoRepairService.instance.getAutoRepairConfig().getIgnoreDCs(repairType).contains(nodeDC)) + if (UnifiedRepairService.instance.getUnifiedRepairConfig().getIgnoreDCs(repairType).contains(nodeDC)) { logger.info("Ignore node {} because its datacenter is {}", node, nodeDC); continue; @@ -443,39 +443,39 @@ public static TreeSet getHostIdsInCurrentRing(RepairType repairType) } // This function will return the host ID for the node which has not been repaired for longest time - public static AutoRepairHistory getHostWithLongestUnrepairTime(RepairType repairType) + public static UnifiedRepairHistory getHostWithLongestUnrepairTime(RepairType repairType) { - List autoRepairHistories = getAutoRepairHistory(repairType); - return getHostWithLongestUnrepairTime(autoRepairHistories); + List unifiedRepairHistories = getUnifiedRepairHistory(repairType); + return getHostWithLongestUnrepairTime(unifiedRepairHistories); } - private static AutoRepairHistory getHostWithLongestUnrepairTime(List autoRepairHistories) + private static UnifiedRepairHistory getHostWithLongestUnrepairTime(List unifiedRepairHistories) { - if (autoRepairHistories == null) + if (unifiedRepairHistories == null) { return null; } - AutoRepairHistory rst = null; + UnifiedRepairHistory rst = null; long oldestTimestamp = Long.MAX_VALUE; - for (AutoRepairHistory autoRepairHistory : autoRepairHistories) + for (UnifiedRepairHistory unifiedRepairHistory : unifiedRepairHistories) { - if (autoRepairHistory.lastRepairFinishTime < oldestTimestamp) + if (unifiedRepairHistory.lastRepairFinishTime < oldestTimestamp) { - rst = autoRepairHistory; - oldestTimestamp = autoRepairHistory.lastRepairFinishTime; + rst = unifiedRepairHistory; + oldestTimestamp = unifiedRepairHistory.lastRepairFinishTime; } } return rst; } - public static int getMaxNumberOfNodeRunAutoRepair(RepairType repairType, int groupSize) + public static int getMaxNumberOfNodeRunUnifiedRepair(RepairType repairType, int groupSize) { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); if (groupSize == 0) { return Math.max(config.getParallelRepairCount(repairType), 1); } - // we will use the max number from config between auto_repair_parallel_repair_count_in_group and auto_repair_parallel_repair_percentage_in_group + // we will use the max number from config between unified_repair_parallel_repair_count_in_group and unified_repair_parallel_repair_percentage_in_group int value = Math.max(groupSize * config.getParallelRepairPercentage(repairType) / 100, config.getParallelRepairCount(repairType)); // make sure at least one node getting repaired @@ -492,33 +492,33 @@ public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) TreeSet hostIdsInCurrentRing = getHostIdsInCurrentRing(repairType, allNodesInRing); logger.info("Total nodes qualified for repair {}", hostIdsInCurrentRing.size()); - List autoRepairHistories = getAutoRepairHistory(repairType); - Set autoRepairHistoryIds = new HashSet<>(); + List unifiedRepairHistories = getUnifiedRepairHistory(repairType); + Set unifiedRepairHistoryIds = new HashSet<>(); // 1. Remove any node that is not part of group based on goissip info - if (autoRepairHistories != null) + if (unifiedRepairHistories != null) { - for (AutoRepairHistory nodeHistory : autoRepairHistories) + for (UnifiedRepairHistory nodeHistory : unifiedRepairHistories) { - autoRepairHistoryIds.add(nodeHistory.hostId); + unifiedRepairHistoryIds.add(nodeHistory.hostId); // clear delete_hosts if the node's delete hosts is not growing for more than two hours - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); if (nodeHistory.deleteHosts.size() > 0 - && config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( + && config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( currentTimeMillis() - nodeHistory.deleteHostsUpdateTime )) { clearDeleteHosts(repairType, nodeHistory.hostId); logger.info("Delete hosts for {} for repair type {} has not been updated for more than {} seconds. Delete hosts has been cleared. Delete hosts before clear {}" - , nodeHistory.hostId, repairType, config.getAutoRepairHistoryClearDeleteHostsBufferInterval(), nodeHistory.deleteHosts); + , nodeHistory.hostId, repairType, config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval(), nodeHistory.deleteHosts); } else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) { if (nodeHistory.deleteHosts.size() > Math.max(2, hostIdsInCurrentRing.size() * 0.5)) { // More than half of the groups thinks the record should be deleted - logger.info("{} think {} is orphan node, will delete auto repair history for repair type {}.", nodeHistory.deleteHosts, nodeHistory.hostId, repairType); - deleteAutoRepairHistory(repairType, nodeHistory.hostId); + logger.info("{} think {} is orphan node, will delete unified repair history for repair type {}.", nodeHistory.deleteHosts, nodeHistory.hostId, repairType); + deleteUnifiedRepairHistory(repairType, nodeHistory.hostId); } else { @@ -530,23 +530,23 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } - // 2. Add node to auto repair history table if a node is in gossip info + // 2. Add node to unified repair history table if a node is in gossip info for (UUID hostId : hostIdsInCurrentRing) { - if (!autoRepairHistoryIds.contains(hostId)) + if (!unifiedRepairHistoryIds.contains(hostId)) { - logger.info("{} for repair type {} doesn't exist in the auto repair history table, insert a new record.", repairType, hostId); + logger.info("{} for repair type {} doesn't exist in the unified repair history table, insert a new record.", repairType, hostId); insertNewRepairHistory(repairType, hostId, currentTimeMillis(), currentTimeMillis()); } } //get current repair status - CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); + CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, unifiedRepairHistories); if (currentRepairStatus != null) { logger.info("Latest repair status {}", currentRepairStatus); //check if I am forced to run repair - for (AutoRepairHistory history : currentRepairStatus.historiesWithoutOnGoingRepair) + for (UnifiedRepairHistory history : currentRepairStatus.historiesWithoutOnGoingRepair) { if (history.forceRepair && history.hostId.equals(myId)) { @@ -555,24 +555,24 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } - int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepair(repairType, - autoRepairHistories == null ? 0 : autoRepairHistories.size()); + int parallelRepairNumber = getMaxNumberOfNodeRunUnifiedRepair(repairType, + unifiedRepairHistories == null ? 0 : unifiedRepairHistories.size()); logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); if (currentRepairStatus == null || parallelRepairNumber > currentRepairStatus.hostIdsWithOnGoingRepair.size()) { // more repairs can be run, I might be the new one - if (autoRepairHistories != null) + if (unifiedRepairHistories != null) { - logger.info("Auto repair history table has {} records", autoRepairHistories.size()); + logger.info("Unified repair history table has {} records", unifiedRepairHistories.size()); } else { // try to fetch again - autoRepairHistories = getAutoRepairHistory(repairType); - currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); - if (autoRepairHistories == null || currentRepairStatus == null) + unifiedRepairHistories = getUnifiedRepairHistory(repairType); + currentRepairStatus = getCurrentRepairStatus(repairType, unifiedRepairHistories); + if (unifiedRepairHistories == null || currentRepairStatus == null) { logger.error("No record found"); return NOT_MY_TURN; @@ -612,7 +612,7 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } // get the longest unrepaired node from the nodes which are not running repair - AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); + UnifiedRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); //check who is next, which is helpful for debugging logger.info("Next node to be repaired for repair type {} by default: {}", repairType, defaultNodeToBeRepaired); if (defaultNodeToBeRepaired != null && defaultNodeToBeRepaired.hostId.equals(myId)) @@ -634,7 +634,7 @@ else if (currentRepairStatus.hostIdsWithOnGoingForceRepair.contains(myId)) return NOT_MY_TURN; } - static void deleteAutoRepairHistory(RepairType repairType, UUID hostId) + static void deleteUnifiedRepairHistory(RepairType repairType, UUID hostId) { //delete the given hostId delStatementRepairHistory.execute(QueryState.forInternalCalls(), @@ -643,7 +643,7 @@ static void deleteAutoRepairHistory(RepairType repairType, UUID hostId) ByteBufferUtil.bytes(hostId))), Dispatcher.RequestTime.forImmediateExecution()); } - static void updateStartAutoRepairHistory(RepairType repairType, UUID myId, long timestamp, RepairTurn turn) + static void updateStartUnifiedRepairHistory(RepairType repairType, UUID myId, long timestamp, RepairTurn turn) { recordStartRepairHistoryStatement.execute(QueryState.forInternalCalls(), QueryOptions.forInternalCalls(internalQueryCL, @@ -654,7 +654,7 @@ static void updateStartAutoRepairHistory(RepairType repairType, UUID myId, long )), Dispatcher.RequestTime.forImmediateExecution()); } - static void updateFinishAutoRepairHistory(RepairType repairType, UUID myId, long timestamp) + static void updateFinishUnifiedRepairHistory(RepairType repairType, UUID myId, long timestamp) { recordFinishRepairHistoryStatement.execute(QueryState.forInternalCalls(), QueryOptions.forInternalCalls(internalQueryCL, @@ -663,15 +663,15 @@ static void updateFinishAutoRepairHistory(RepairType repairType, UUID myId, long ByteBufferUtil.bytes(myId) )), Dispatcher.RequestTime.forImmediateExecution()); // Do not remove beblow log, the log is used by dtest - logger.info("Auto repair finished for {}", myId); + logger.info("Unified repair finished for {}", myId); } public static void insertNewRepairHistory(RepairType repairType, UUID hostId, long startTime, long finishTime) { try { - Keyspace autoRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); - ConsistencyLevel cl = autoRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? + Keyspace unifiedRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); + ConsistencyLevel cl = unifiedRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? ConsistencyLevel.LOCAL_SERIAL : null; UntypedResultSet resultSet; @@ -688,7 +688,7 @@ public static void insertNewRepairHistory(RepairType repairType, UUID hostId, lo boolean applied = resultSet.one().getBoolean(ModificationStatement.CAS_RESULT_COLUMN.toString()); if (applied) { - logger.info("Successfully inserted a new auto repair history record for host id: {}", hostId); + logger.info("Successfully inserted a new unified repair history record for host id: {}", hostId); } else { @@ -811,21 +811,21 @@ public static boolean tableMaxRepairTimeExceeded(RepairType repairType, long sta { long tableRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds (currentTimeMillis() - startTime); - return AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() < + return UnifiedRepairService.instance.getUnifiedRepairConfig().getUnifiedRepairTableMaxRepairTime(repairType).toSeconds() < tableRepairTimeSoFar; } public static boolean keyspaceMaxRepairTimeExceeded(RepairType repairType, long startTime, int numOfTablesToBeRepaired) { long keyspaceRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds((currentTimeMillis() - startTime)); - return (long) AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() * + return (long) UnifiedRepairService.instance.getUnifiedRepairConfig().getUnifiedRepairTableMaxRepairTime(repairType).toSeconds() * numOfTablesToBeRepaired < keyspaceRepairTimeSoFar; } public static List getAllMVs(RepairType repairType, Keyspace keyspace, TableMetadata tableMetadata) { List allMvs = new ArrayList<>(); - if (AutoRepairService.instance.getAutoRepairConfig().getMVRepairEnabled(repairType) && keyspace.getMetadata().views != null) + if (UnifiedRepairService.instance.getUnifiedRepairConfig().getMVRepairEnabled(repairType) && keyspace.getMetadata().views != null) { Iterator views = keyspace.getMetadata().views.forTable(tableMetadata.id).iterator(); while (views.hasNext()) @@ -840,12 +840,12 @@ public static List getAllMVs(RepairType repairType, Keyspace keyspace, T public static void runRepairOnNewlyBootstrappedNodeIfEnabled() { - AutoRepairConfig repairConfig = DatabaseDescriptor.getAutoRepairConfig(); - if (repairConfig.isAutoRepairSchedulingEnabled()) + UnifiedRepairConfig repairConfig = DatabaseDescriptor.getUnifiedRepairConfig(); + if (repairConfig.isUnifiedRepairSchedulingEnabled()) { - for (AutoRepairConfig.RepairType rType : AutoRepairConfig.RepairType.values()) - if (repairConfig.isAutoRepairEnabled(rType) && repairConfig.getForceRepairNewNode(rType)) - AutoRepairUtils.setForceRepairNewNode(rType); + for (UnifiedRepairConfig.RepairType rType : UnifiedRepairConfig.RepairType.values()) + if (repairConfig.isUnifiedRepairEnabled(rType) && repairConfig.getForceRepairNewNode(rType)) + UnifiedRepairUtils.setForceRepairNewNode(rType); } } diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index ffeb4580eba5..e1dc51b8038f 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -45,7 +45,7 @@ import org.apache.cassandra.db.rows.*; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.schema.ColumnMetadata.ClusteringOrder; import org.apache.cassandra.schema.Keyspaces.KeyspacesDiff; @@ -598,8 +598,8 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("compression", params.compression.asMap()) .add("read_repair", params.readRepair.toString()) .add("extensions", params.extensions) - .add("repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) - .add("repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); + .add("repair_full", params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).asMap()) + .add("repair_incremental", params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).asMap()); // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ @@ -1090,8 +1090,8 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) .fastPath(getFastPathStrategy(row)) - .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, row.getFrozenTextMap("repair_full"))) - .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))); + .unifiedRepairFull(UnifiedRepairParams.fromMap(UnifiedRepairConfig.RepairType.full, row.getFrozenTextMap("repair_full"))) + .unifiedRepairIncremental(UnifiedRepairParams.fromMap(UnifiedRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index f9c023e33b32..209007868132 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -95,11 +95,11 @@ private SystemDistributedKeyspace() public static final String PARTITION_DENYLIST_TABLE = "partition_denylist"; - public static final String AUTO_REPAIR_HISTORY = "auto_repair_history"; + public static final String UNIFIED_REPAIR_HISTORY = "unified_repair_history"; - public static final String AUTO_REPAIR_PRIORITY = "auto_repair_priority"; + public static final String UNIFIED_REPAIR_PRIORITY = "unified_repair_priority"; - public static final Set TABLE_NAMES = ImmutableSet.of(REPAIR_HISTORY, PARENT_REPAIR_HISTORY, VIEW_BUILD_STATUS, PARTITION_DENYLIST_TABLE, AUTO_REPAIR_HISTORY, AUTO_REPAIR_PRIORITY); + public static final Set TABLE_NAMES = ImmutableSet.of(REPAIR_HISTORY, PARENT_REPAIR_HISTORY, VIEW_BUILD_STATUS, PARTITION_DENYLIST_TABLE, UNIFIED_REPAIR_HISTORY, UNIFIED_REPAIR_PRIORITY); public static final String REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + "keyspace_name text," @@ -162,27 +162,27 @@ private SystemDistributedKeyspace() private static final TableMetadata PartitionDenylistTable = parse(PARTITION_DENYLIST_TABLE, "Partition keys which have been denied access", PARTITION_DENYLIST_CQL).build(); - public static final String AUTO_REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" - + "host_id uuid," - + "repair_type text," - + "repair_turn text," - + "repair_start_ts timestamp," - + "repair_finish_ts timestamp," - + "delete_hosts set," - + "delete_hosts_update_time timestamp," - + "force_repair boolean," - + "PRIMARY KEY (repair_type, host_id))"; + public static final String UNIFIED_REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + + "host_id uuid," + + "repair_type text," + + "repair_turn text," + + "repair_start_ts timestamp," + + "repair_finish_ts timestamp," + + "delete_hosts set," + + "delete_hosts_update_time timestamp," + + "force_repair boolean," + + "PRIMARY KEY (repair_type, host_id))"; - private static final TableMetadata AutoRepairHistoryTable = - parse(AUTO_REPAIR_HISTORY, "Auto repair history for each node", AUTO_REPAIR_HISTORY_CQL).build(); + private static final TableMetadata UnifiedRepairHistoryTable = + parse(UNIFIED_REPAIR_HISTORY, "Unified repair history for each node", UNIFIED_REPAIR_HISTORY_CQL).build(); - public static final String AUTO_REPAIR_PRIORITY_CQL = "CREATE TABLE IF NOT EXISTS %s (" - + "repair_type text," - + "repair_priority set," - + "PRIMARY KEY (repair_type))"; + public static final String UNIFIED_REPAIR_PRIORITY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + + "repair_type text," + + "repair_priority set," + + "PRIMARY KEY (repair_type))"; - private static final TableMetadata AutoRepairPriorityTable = - parse(AUTO_REPAIR_PRIORITY, "Auto repair priority for each group", AUTO_REPAIR_PRIORITY_CQL).build(); + private static final TableMetadata UnifiedRepairPriorityTable = + parse(UNIFIED_REPAIR_PRIORITY, "Unified repair priority for each group", UNIFIED_REPAIR_PRIORITY_CQL).build(); private static TableMetadata.Builder parse(String table, String description, String cql) @@ -197,7 +197,7 @@ public static KeyspaceMetadata metadata() { return KeyspaceMetadata.create(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, KeyspaceParams.simple(Math.max(DEFAULT_RF, DatabaseDescriptor.getDefaultKeyspaceRF())), - Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable, AutoRepairHistoryTable, AutoRepairPriorityTable)); + Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable, UnifiedRepairHistoryTable, UnifiedRepairPriorityTable)); } public static void startParentRepair(TimeUUID parent_id, String keyspaceName, String[] cfnames, RepairOption options) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 2c3cfe4daf28..8d432667408e 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -39,7 +39,7 @@ import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import org.apache.cassandra.service.reads.PercentileSpeculativeRetryPolicy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; @@ -137,7 +137,7 @@ public String toString() public final TransactionalMigrationFromMode transactionalMigrationFrom; public final boolean pendingDrop; - public final Map automatedRepair; + public final Map unifiedRepair; private TableParams(Builder builder) { @@ -167,11 +167,11 @@ private TableParams(Builder builder) transactionalMigrationFrom = builder.transactionalMigrationFrom; pendingDrop = builder.pendingDrop; checkNotNull(transactionalMigrationFrom); - automatedRepair = new EnumMap(AutoRepairConfig.RepairType.class) + unifiedRepair = new EnumMap(UnifiedRepairConfig.RepairType.class) { { - put(AutoRepairConfig.RepairType.full, builder.automatedRepairFull); - put(AutoRepairConfig.RepairType.incremental, builder.automatedRepairIncremental); + put(UnifiedRepairConfig.RepairType.full, builder.unifiedRepairFull); + put(UnifiedRepairConfig.RepairType.incremental, builder.unifiedRepairIncremental); } }; } @@ -206,8 +206,8 @@ public static Builder builder(TableParams params) .transactionalMode(params.transactionalMode) .transactionalMigrationFrom(params.transactionalMigrationFrom) .pendingDrop(params.pendingDrop) - .automatedRepairFull(params.automatedRepair.get(AutoRepairConfig.RepairType.full)) - .automatedRepairIncremental(params.automatedRepair.get(AutoRepairConfig.RepairType.incremental)) + .unifiedRepairFull(params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full)) + .unifiedRepairIncremental(params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental)) ; } @@ -267,7 +267,7 @@ public void validate() if (transactionalMode.isTestMode() && !CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getBoolean()) fail("Transactional mode " + transactionalMode + " can't be used if " + CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getKey() + " is not set"); - for (Map.Entry entry : automatedRepair.entrySet()) + for (Map.Entry entry : unifiedRepair.entrySet()) { entry.getValue().validate(); } @@ -312,7 +312,7 @@ public boolean equals(Object o) && transactionalMode == p.transactionalMode && transactionalMigrationFrom == p.transactionalMigrationFrom && pendingDrop == p.pendingDrop - && automatedRepair.equals(p.automatedRepair); + && unifiedRepair.equals(p.unifiedRepair); } @Override @@ -341,7 +341,7 @@ public int hashCode() transactionalMode, transactionalMigrationFrom, pendingDrop, - automatedRepair); + unifiedRepair); } @Override @@ -372,8 +372,8 @@ public String toString() .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) .add(PENDING_DROP.toString(), pendingDrop) - .add(Option.REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.full)) - .add(Option.REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.incremental)) + .add(Option.REPAIR_FULL.toString(), unifiedRepair.get(UnifiedRepairConfig.RepairType.full)) + .add(Option.REPAIR_INCREMENTAL.toString(), unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental)) .toString(); } @@ -436,9 +436,9 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()) .newLine() - .append("AND repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) + .append("AND repair_full = ").append(unifiedRepair.get(UnifiedRepairConfig.RepairType.full).asMap()) .newLine() - .append("AND repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); + .append("AND repair_incremental = ").append(unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).asMap()); } public static final class Builder @@ -467,8 +467,8 @@ public static final class Builder public TransactionalMigrationFromMode transactionalMigrationFrom = TransactionalMigrationFromMode.none; public boolean pendingDrop = false; - private AutoRepairParams automatedRepairFull = new AutoRepairParams(AutoRepairConfig.RepairType.full); - private AutoRepairParams automatedRepairIncremental = new AutoRepairParams(AutoRepairConfig.RepairType.incremental); + private UnifiedRepairParams unifiedRepairFull = new UnifiedRepairParams(UnifiedRepairConfig.RepairType.full); + private UnifiedRepairParams unifiedRepairIncremental = new UnifiedRepairParams(UnifiedRepairConfig.RepairType.incremental); public Builder() { @@ -617,15 +617,15 @@ public Builder pendingDrop(boolean pendingDrop) return this; } - public Builder automatedRepairFull(AutoRepairParams val) + public Builder unifiedRepairFull(UnifiedRepairParams val) { - automatedRepairFull = val; + unifiedRepairFull = val; return this; } - public Builder automatedRepairIncremental(AutoRepairParams val) + public Builder unifiedRepairIncremental(UnifiedRepairParams val) { - automatedRepairIncremental = val; + unifiedRepairIncremental = val; return this; } } diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/UnifiedRepairParams.java similarity index 74% rename from src/java/org/apache/cassandra/schema/AutoRepairParams.java rename to src/java/org/apache/cassandra/schema/UnifiedRepairParams.java index ea3802db93d2..27459ad30be0 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/UnifiedRepairParams.java @@ -26,12 +26,12 @@ import org.apache.commons.lang3.StringUtils; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import static java.lang.String.format; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; -public final class AutoRepairParams +public final class UnifiedRepairParams { public enum Option { @@ -44,23 +44,23 @@ public String toString() } } - public static final Map> DEFAULT_OPTIONS = - ImmutableMap.of(AutoRepairConfig.RepairType.full, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), - AutoRepairConfig.RepairType.incremental, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); + public static final Map> DEFAULT_OPTIONS = + ImmutableMap.of(UnifiedRepairConfig.RepairType.full, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), + UnifiedRepairConfig.RepairType.incremental, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); - public final AutoRepairConfig.RepairType type; + public final UnifiedRepairConfig.RepairType type; - private Map> options = DEFAULT_OPTIONS; + private Map> options = DEFAULT_OPTIONS; - AutoRepairParams(AutoRepairConfig.RepairType type) + UnifiedRepairParams(UnifiedRepairConfig.RepairType type) { this.type = type; } - public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Map options) + public static UnifiedRepairParams create(UnifiedRepairConfig.RepairType repairType, Map options) { - Map> optionsMap = new HashMap<>(); - for (Map.Entry> entry : DEFAULT_OPTIONS.entrySet()) + Map> optionsMap = new HashMap<>(); + for (Map.Entry> entry : DEFAULT_OPTIONS.entrySet()) { optionsMap.put(entry.getKey(), new HashMap<>(entry.getValue())); } @@ -75,7 +75,7 @@ public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Ma optionsMap.get(repairType).put(entry.getKey(), entry.getValue()); } } - AutoRepairParams repairParams = new AutoRepairParams(repairType); + UnifiedRepairParams repairParams = new UnifiedRepairParams(repairType); repairParams.options = optionsMap; return repairParams; } @@ -109,7 +109,7 @@ public Map options() return options.get(type); } - public static AutoRepairParams fromMap(AutoRepairConfig.RepairType repairType, Map map) + public static UnifiedRepairParams fromMap(UnifiedRepairConfig.RepairType repairType, Map map) { return create(repairType, map); } @@ -133,10 +133,10 @@ public boolean equals(Object o) if (this == o) return true; - if (!(o instanceof AutoRepairParams)) + if (!(o instanceof UnifiedRepairParams)) return false; - AutoRepairParams cp = (AutoRepairParams) o; + UnifiedRepairParams cp = (UnifiedRepairParams) o; return options.equals(cp.options); } diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index 69ee2c2c5d67..90ace416c8cf 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -403,7 +403,7 @@ protected void setup() AuditLogManager.instance.initialize(); - StorageService.instance.doAutoRepairSetup(); + StorageService.instance.doUnifiedRepairSetup(); // schedule periodic background compaction task submission. this is simply a backstop against compactions stalling // due to scheduling errors or race conditions diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index a635b8f7b7b6..afa1202137ef 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -73,7 +73,7 @@ import com.google.common.util.concurrent.Uninterruptibles; import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepair; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1131,15 +1131,15 @@ public void doAuthSetup(boolean async) } } - public void doAutoRepairSetup() + public void doUnifiedRepairSetup() { - AutoRepairService.setup(); - if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + UnifiedRepairService.setup(); + if (DatabaseDescriptor.getUnifiedRepairConfig().isUnifiedRepairSchedulingEnabled()) { - logger.info("Enable auto-repair scheduling"); - AutoRepair.instance.setup(); + logger.info("Enable unified-repair scheduling"); + UnifiedRepair.instance.setup(); } - logger.info("AutoRepair setup complete!"); + logger.info("UnifiedRepair setup complete!"); } diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/UnifiedRepairService.java similarity index 72% rename from src/java/org/apache/cassandra/service/AutoRepairService.java rename to src/java/org/apache/cassandra/service/UnifiedRepairService.java index 7a8342c2a7a4..489dd6873011 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/UnifiedRepairService.java @@ -20,9 +20,9 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; import org.apache.cassandra.utils.MBeanWrapper; import java.util.HashSet; @@ -32,23 +32,23 @@ import com.google.common.annotations.VisibleForTesting; -public class AutoRepairService implements AutoRepairServiceMBean +public class UnifiedRepairService implements UnifiedRepairServiceMBean { - public static final String MBEAN_NAME = "org.apache.cassandra.db:type=AutoRepairService"; + public static final String MBEAN_NAME = "org.apache.cassandra.db:type=UnifiedRepairService"; @VisibleForTesting - protected AutoRepairConfig config; + protected UnifiedRepairConfig config; - public static final AutoRepairService instance = new AutoRepairService(); + public static final UnifiedRepairService instance = new UnifiedRepairService(); @VisibleForTesting - protected AutoRepairService() + protected UnifiedRepairService() { } public static void setup() { - instance.config = DatabaseDescriptor.getAutoRepairConfig(); + instance.config = DatabaseDescriptor.getUnifiedRepairConfig(); } static @@ -58,8 +58,8 @@ public static void setup() public void checkCanRun(RepairType repairType) { - if (!config.isAutoRepairSchedulingEnabled()) - throw new ConfigurationException("Auto-repair scheduller is disabled."); + if (!config.isUnifiedRepairSchedulingEnabled()) + throw new ConfigurationException("Unified-repair scheduller is disabled."); if (repairType != RepairType.incremental) return; @@ -72,16 +72,16 @@ public void checkCanRun(RepairType repairType) } @Override - public AutoRepairConfig getAutoRepairConfig() + public UnifiedRepairConfig getUnifiedRepairConfig() { return config; } @Override - public void setAutoRepairEnabled(RepairType repairType, boolean enabled) + public void setUnifiedRepairEnabled(RepairType repairType, boolean enabled) { checkCanRun(repairType); - config.setAutoRepairEnabled(repairType, enabled); + config.setUnifiedRepairEnabled(repairType, enabled); } @Override @@ -93,18 +93,18 @@ public void setRepairThreads(RepairType repairType, int repairThreads) @Override public void setRepairPriorityForHosts(RepairType repairType, Set hosts) { - AutoRepairUtils.addPriorityHosts(repairType, hosts); + UnifiedRepairUtils.addPriorityHosts(repairType, hosts); } @Override public Set getRepairHostPriority(RepairType repairType) { - return AutoRepairUtils.getPriorityHosts(repairType); + return UnifiedRepairUtils.getPriorityHosts(repairType); } @Override public void setForceRepairForHosts(RepairType repairType, Set hosts) { - AutoRepairUtils.setForceRepair(repairType, hosts); + UnifiedRepairUtils.setForceRepair(repairType, hosts); } @Override @@ -125,19 +125,19 @@ public void startScheduler() config.startScheduler(); } - public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) + public void setUnifiedRepairHistoryClearDeleteHostsBufferDuration(String duration) { - config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); + config.setUnifiedRepairHistoryClearDeleteHostsBufferInterval(duration); } @Override - public void setAutoRepairMaxRetriesCount(int retries) + public void setUnifiedRepairMaxRetriesCount(int retries) { config.setRepairMaxRetries(retries); } @Override - public void setAutoRepairRetryBackoff(String interval) + public void setUnifiedRepairRetryBackoff(String interval) { config.setRepairRetryBackoff(interval); } @@ -149,9 +149,9 @@ public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssta } @Override - public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) + public void setUnifiedRepairTableMaxRepairTime(RepairType repairType, String unifiedRepairTableMaxRepairTime) { - config.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); + config.setUnifiedRepairTableMaxRepairTime(repairType, unifiedRepairTableMaxRepairTime); } @Override @@ -192,12 +192,12 @@ public void setRepairSessionTimeout(RepairType repairType, String timeout) public Set getOnGoingRepairHostIds(RepairType rType) { Set hostIds = new HashSet<>(); - List histories = AutoRepairUtils.getAutoRepairHistory(rType); + List histories = UnifiedRepairUtils.getUnifiedRepairHistory(rType); if (histories == null) { return hostIds; } - AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(rType)); + UnifiedRepairUtils.CurrentRepairStatus currentRepairStatus = new UnifiedRepairUtils.CurrentRepairStatus(histories, UnifiedRepairUtils.getPriorityHostIds(rType)); for (UUID id : currentRepairStatus.hostIdsWithOnGoingRepair) { hostIds.add(id.toString()); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/UnifiedRepairServiceMBean.java similarity index 74% rename from src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java rename to src/java/org/apache/cassandra/service/UnifiedRepairServiceMBean.java index 121c9a480303..ec5a71bdcddd 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/UnifiedRepairServiceMBean.java @@ -18,17 +18,17 @@ package org.apache.cassandra.service; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; import java.util.Set; -public interface AutoRepairServiceMBean +public interface UnifiedRepairServiceMBean { /** - * Enable or disable auto-repair for a given repair type + * Enable or disable unified-repair for a given repair type */ - public void setAutoRepairEnabled(RepairType repairType, boolean enabled); + public void setUnifiedRepairEnabled(RepairType repairType, boolean enabled); public void setRepairThreads(RepairType repairType, int repairThreads); @@ -44,15 +44,15 @@ public interface AutoRepairServiceMBean void startScheduler(); - public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); + public void setUnifiedRepairHistoryClearDeleteHostsBufferDuration(String duration); - public void setAutoRepairMaxRetriesCount(int retries); + public void setUnifiedRepairMaxRetriesCount(int retries); - public void setAutoRepairRetryBackoff(String interval); + public void setUnifiedRepairRetryBackoff(String interval); public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssTableHigherThreshold); - public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime); + public void setUnifiedRepairTableMaxRepairTime(RepairType repairType, String unifiedRepairTableMaxRepairTime); public void setIgnoreDCs(RepairType repairType, Set ignorDCs); @@ -64,7 +64,7 @@ public interface AutoRepairServiceMBean public void setMVRepairEnabled(RepairType repairType, boolean enabled); - public AutoRepairConfig getAutoRepairConfig(); + public UnifiedRepairConfig getUnifiedRepairConfig(); public void setRepairSessionTimeout(RepairType repairType, String timeout); diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java index 19d6cb50edae..78ee739dd452 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java @@ -40,7 +40,7 @@ import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordService; @@ -222,7 +222,7 @@ public SequenceState executeNext() return halted(); } // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); + UnifiedRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java index f3a3a8ff4577..f074e285df13 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java @@ -44,7 +44,7 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -219,7 +219,7 @@ public SequenceState executeNext() return halted(); } // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); + UnifiedRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else diff --git a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java index 1c55d84ca244..ab4d8ffd68cc 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java +++ b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java @@ -29,7 +29,7 @@ import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -87,7 +87,7 @@ public static void streamData(NodeId nodeId, ClusterMetadata metadata, boolean s throw new IllegalStateException("Could not finish join for during replacement"); } // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); + UnifiedRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } if (finishJoiningRing) diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 5417c127e5e0..e495ef781c38 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -113,10 +113,10 @@ import org.apache.cassandra.metrics.ThreadPoolMetrics; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.MessagingServiceMBean; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import org.apache.cassandra.service.ActiveRepairServiceMBean; -import org.apache.cassandra.service.AutoRepairService; -import org.apache.cassandra.service.AutoRepairServiceMBean; +import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.service.UnifiedRepairServiceMBean; import org.apache.cassandra.service.CacheService; import org.apache.cassandra.service.CacheServiceMBean; import org.apache.cassandra.service.snapshot.SnapshotManagerMBean; @@ -184,7 +184,7 @@ public class NodeProbe implements AutoCloseable protected CIDRGroupsMappingManagerMBean cmbProxy; protected PermissionsCacheMBean pcProxy; protected RolesCacheMBean rcProxy; - protected AutoRepairServiceMBean autoRepairProxy; + protected UnifiedRepairServiceMBean unifiedRepairProxy; protected Output output; private boolean failed; @@ -331,8 +331,8 @@ protected void connect() throws IOException name = new ObjectName(CIDRFilteringMetricsTable.MBEAN_NAME); cfmProxy = JMX.newMBeanProxy(mbeanServerConn, name, CIDRFilteringMetricsTableMBean.class); - name = new ObjectName(AutoRepairService.MBEAN_NAME); - autoRepairProxy = JMX.newMBeanProxy(mbeanServerConn, name, AutoRepairServiceMBean.class); + name = new ObjectName(UnifiedRepairService.MBEAN_NAME); + unifiedRepairProxy = JMX.newMBeanProxy(mbeanServerConn, name, UnifiedRepairServiceMBean.class); } catch (MalformedObjectNameException e) { @@ -2564,95 +2564,95 @@ public void abortBootstrap(String nodeId, String endpoint) ssProxy.abortBootstrap(nodeId, endpoint); } - public AutoRepairConfig getAutoRepairConfig() { - return autoRepairProxy.getAutoRepairConfig(); + public UnifiedRepairConfig getUnifiedRepairConfig() { + return unifiedRepairProxy.getUnifiedRepairConfig(); } - public void setAutoRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) + public void setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType repairType, boolean enabled) { - autoRepairProxy.setAutoRepairEnabled(repairType, enabled); + unifiedRepairProxy.setUnifiedRepairEnabled(repairType, enabled); } - public void setRepairThreads(AutoRepairConfig.RepairType repairType, int repairThreads) + public void setRepairThreads(UnifiedRepairConfig.RepairType repairType, int repairThreads) { - autoRepairProxy.setRepairThreads(repairType, repairThreads); + unifiedRepairProxy.setRepairThreads(repairType, repairThreads); } - public void setRepairPriorityForHosts(AutoRepairConfig.RepairType repairType, Set hosts) + public void setRepairPriorityForHosts(UnifiedRepairConfig.RepairType repairType, Set hosts) { - autoRepairProxy.setRepairPriorityForHosts(repairType, hosts); + unifiedRepairProxy.setRepairPriorityForHosts(repairType, hosts); } - public Set getRepairPriorityForHosts(AutoRepairConfig.RepairType repairType) + public Set getRepairPriorityForHosts(UnifiedRepairConfig.RepairType repairType) { - return autoRepairProxy.getRepairHostPriority(repairType); + return unifiedRepairProxy.getRepairHostPriority(repairType); } - public void setForceRepairForHosts(AutoRepairConfig.RepairType repairType, Set hosts){ - autoRepairProxy.setForceRepairForHosts(repairType, hosts); + public void setForceRepairForHosts(UnifiedRepairConfig.RepairType repairType, Set hosts){ + unifiedRepairProxy.setForceRepairForHosts(repairType, hosts); } - public void setRepairSubRangeNum(AutoRepairConfig.RepairType repairType, int repairSubRanges) + public void setRepairSubRangeNum(UnifiedRepairConfig.RepairType repairType, int repairSubRanges) { - autoRepairProxy.setRepairSubRangeNum(repairType, repairSubRanges); + unifiedRepairProxy.setRepairSubRangeNum(repairType, repairSubRanges); } - public void setRepairMinInterval(AutoRepairConfig.RepairType repairType, String minRepairInterval) + public void setRepairMinInterval(UnifiedRepairConfig.RepairType repairType, String minRepairInterval) { - autoRepairProxy.setRepairMinInterval(repairType, minRepairInterval); + unifiedRepairProxy.setRepairMinInterval(repairType, minRepairInterval); } - public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) + public void setUnifiedRepairHistoryClearDeleteHostsBufferDuration(String duration) { - autoRepairProxy.setAutoRepairHistoryClearDeleteHostsBufferDuration(duration); + unifiedRepairProxy.setUnifiedRepairHistoryClearDeleteHostsBufferDuration(duration); } public void startScheduler() { - autoRepairProxy.startScheduler(); + unifiedRepairProxy.startScheduler(); } - public void setAutoRepairMaxRetriesCount(int retries) + public void setUnifiedRepairMaxRetriesCount(int retries) { - autoRepairProxy.setAutoRepairMaxRetriesCount(retries); + unifiedRepairProxy.setUnifiedRepairMaxRetriesCount(retries); } - public void setAutoRepairRetryBackoff(String interval) + public void setUnifiedRepairRetryBackoff(String interval) { - autoRepairProxy.setAutoRepairRetryBackoff(interval); + unifiedRepairProxy.setUnifiedRepairRetryBackoff(interval); } - public void setRepairSSTableCountHigherThreshold(AutoRepairConfig.RepairType repairType, int ssTableHigherThreshold) + public void setRepairSSTableCountHigherThreshold(UnifiedRepairConfig.RepairType repairType, int ssTableHigherThreshold) { - autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); + unifiedRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); } - public void setAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType repairType, String autoRepairTableMaxRepairTime) + public void setUnifiedRepairTableMaxRepairTime(UnifiedRepairConfig.RepairType repairType, String unifiedRepairTableMaxRepairTime) { - autoRepairProxy.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); + unifiedRepairProxy.setUnifiedRepairTableMaxRepairTime(repairType, unifiedRepairTableMaxRepairTime); } - public void setAutoRepairIgnoreDCs(AutoRepairConfig.RepairType repairType, Set ignoreDCs) + public void setUnifiedRepairIgnoreDCs(UnifiedRepairConfig.RepairType repairType, Set ignoreDCs) { - autoRepairProxy.setIgnoreDCs(repairType, ignoreDCs); + unifiedRepairProxy.setIgnoreDCs(repairType, ignoreDCs); } - public void setParallelRepairPercentage(AutoRepairConfig.RepairType repairType, int percentage) { - autoRepairProxy.setParallelRepairPercentage(repairType, percentage); + public void setParallelRepairPercentage(UnifiedRepairConfig.RepairType repairType, int percentage) { + unifiedRepairProxy.setParallelRepairPercentage(repairType, percentage); } - public void setParallelRepairCount(AutoRepairConfig.RepairType repairType, int count) { - autoRepairProxy.setParallelRepairCount(repairType, count); + public void setParallelRepairCount(UnifiedRepairConfig.RepairType repairType, int count) { + unifiedRepairProxy.setParallelRepairCount(repairType, count); } - public void setPrimaryTokenRangeOnly(AutoRepairConfig.RepairType repairType, boolean primaryTokenRangeOnly) + public void setPrimaryTokenRangeOnly(UnifiedRepairConfig.RepairType repairType, boolean primaryTokenRangeOnly) { - autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); + unifiedRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); } - public void setMVRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) + public void setMVRepairEnabled(UnifiedRepairConfig.RepairType repairType, boolean enabled) { - autoRepairProxy.setMVRepairEnabled(repairType, enabled); + unifiedRepairProxy.setMVRepairEnabled(repairType, enabled); } public List mutateSSTableRepairedState(boolean repair, boolean preview, String keyspace, List tables) throws InvalidRequestException @@ -2664,14 +2664,14 @@ public List getTablesForKeyspace(String keyspace) { return ssProxy.getTablesForKeyspace(keyspace); } - public void setRepairSessionTimeout(AutoRepairConfig.RepairType repairType, String timeout) + public void setRepairSessionTimeout(UnifiedRepairConfig.RepairType repairType, String timeout) { - autoRepairProxy.setRepairSessionTimeout(repairType, timeout); + unifiedRepairProxy.setRepairSessionTimeout(repairType, timeout); } - public Set getOnGoingRepairHostIds(AutoRepairConfig.RepairType type) + public Set getOnGoingRepairHostIds(UnifiedRepairConfig.RepairType type) { - return autoRepairProxy.getOnGoingRepairHostIds(type); + return unifiedRepairProxy.getOnGoingRepairHostIds(type); } } diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 3e025df1652e..42a23ffee213 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -94,7 +94,6 @@ public int execute(String... args) AbortBootstrap.class, AlterTopology.class, Assassinate.class, - AutoRepairStatus.class, CassHelp.class, CIDRFilteringStats.class, Cleanup.class, @@ -134,7 +133,6 @@ public int execute(String... args) GcStats.class, GetAuditLog.class, GetAuthCacheConfig.class, - GetAutoRepairConfig.class, GetBatchlogReplayTrottle.class, GetCIDRGroupsOfIP.class, GetColumnIndexSize.class, @@ -155,6 +153,7 @@ public int execute(String... args) GetStreamThroughput.class, GetTimeout.class, GetTraceProbability.class, + GetUnifiedRepairConfig.class, GossipInfo.class, Import.class, Info.class, @@ -199,7 +198,6 @@ public int execute(String... args) Ring.class, Scrub.class, SetAuthCacheConfig.class, - SetAutoRepairConfig.class, SetBatchlogReplayThrottle.class, SetCacheCapacity.class, SetCacheKeysToSave.class, @@ -218,6 +216,7 @@ public int execute(String... args) SetStreamThroughput.class, SetTimeout.class, SetTraceProbability.class, + SetUnifiedRepairConfig.class, Sjk.class, Snapshot.class, SSTableRepairedSet.class, @@ -234,6 +233,7 @@ public int execute(String... args) TopPartitions.class, TpStats.class, TruncateHints.class, + UnifiedRepairStatus.class, UpdateCIDRGroup.class, UpgradeSSTable.class, Verify.class, diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetUnifiedRepairConfig.java similarity index 82% rename from src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java rename to src/java/org/apache/cassandra/tools/nodetool/GetUnifiedRepairConfig.java index 48f8d54de3f6..336063eb2405 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetUnifiedRepairConfig.java @@ -20,15 +20,15 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import io.airlift.airline.Command; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; import java.io.PrintStream; -@Command(name = "getautorepairconfig", description = "Print autorepair configurations") -public class GetAutoRepairConfig extends NodeToolCmd +@Command(name = "getunifiedrepairconfig", description = "Print unifiedrepair configurations") +public class GetUnifiedRepairConfig extends NodeToolCmd { @VisibleForTesting protected static PrintStream out = System.out; @@ -36,17 +36,17 @@ public class GetAutoRepairConfig extends NodeToolCmd @Override public void execute(NodeProbe probe) { - AutoRepairConfig config = probe.getAutoRepairConfig(); - if (config == null || !config.isAutoRepairSchedulingEnabled()) + UnifiedRepairConfig config = probe.getUnifiedRepairConfig(); + if (config == null || !config.isUnifiedRepairSchedulingEnabled()) { - out.println("Auto-repair is not enabled"); + out.println("Unified-repair is not enabled"); return; } StringBuilder sb = new StringBuilder(); sb.append("repair scheduler configuration:"); sb.append("\n\trepair eligibility check interval: " + config.getRepairCheckInterval()); - sb.append("\n\tTTL for repair history for dead nodes: " + config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); + sb.append("\n\tTTL for repair history for dead nodes: " + config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval()); sb.append("\n\tmax retries for repair: " + config.getRepairMaxRetries()); sb.append("\n\tretry backoff: " + config.getRepairRetryBackoff()); for (RepairType repairType : RepairType.values()) @@ -57,17 +57,17 @@ public void execute(NodeProbe probe) out.println(sb); } - private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, AutoRepairConfig config) + private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, UnifiedRepairConfig config) { StringBuilder sb = new StringBuilder(); sb.append("\nconfiguration for repair type: " + repairType); - sb.append("\n\tenabled: " + config.isAutoRepairEnabled(repairType)); + sb.append("\n\tenabled: " + config.isUnifiedRepairEnabled(repairType)); sb.append("\n\tminimum repair interval: " + config.getRepairMinInterval(repairType)); sb.append("\n\trepair threads: " + config.getRepairThreads(repairType)); sb.append("\n\tnumber of repair subranges: " + config.getRepairSubRangeNum(repairType)); sb.append("\n\tpriority hosts: " + Joiner.on(',').skipNulls().join(probe.getRepairPriorityForHosts(repairType))); sb.append("\n\tsstable count higher threshold: " + config.getRepairSSTableCountHigherThreshold(repairType)); - sb.append("\n\ttable max repair time in sec: " + config.getAutoRepairTableMaxRepairTime(repairType)); + sb.append("\n\ttable max repair time in sec: " + config.getUnifiedRepairTableMaxRepairTime(repairType)); sb.append("\n\tignore datacenters: " + Joiner.on(',').skipNulls().join(config.getIgnoreDCs(repairType))); sb.append("\n\trepair primary token-range: " + config.getRepairPrimaryTokenRangeOnly(repairType)); sb.append("\n\tnumber of parallel repairs within group: " + config.getParallelRepairCount(repairType)); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfig.java similarity index 83% rename from src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java rename to src/java/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfig.java index 2929c944442a..fe123c3de1ac 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfig.java @@ -24,7 +24,7 @@ import io.airlift.airline.Command; import io.airlift.airline.Option; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; @@ -37,12 +37,12 @@ import static com.google.common.base.Preconditions.checkArgument; -@Command(name = "setautorepairconfig", description = "sets the autorepair configuration") -public class SetAutoRepairConfig extends NodeToolCmd +@Command(name = "setunifiedrepairconfig", description = "sets the unifiedrepair configuration") +public class SetUnifiedRepairConfig extends NodeToolCmd { @VisibleForTesting - @Arguments(title = " ", usage = " ", - description = "autorepair param and value.\nPossible autorepair parameters are as following: " + + @Arguments(title = " ", usage = " ", + description = "unifiedrepair param and value.\nPossible unifiedrepair parameters are as following: " + "[start_scheduler|number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + @@ -60,13 +60,13 @@ public class SetAutoRepairConfig extends NodeToolCmd @Override public void execute(NodeProbe probe) { - checkArgument(args.size() == 2, "setautorepairconfig requires param-type, and value args."); + checkArgument(args.size() == 2, "setunifiedrepairconfig requires param-type, and value args."); String paramType = args.get(0); String paramVal = args.get(1); - if (!probe.getAutoRepairConfig().isAutoRepairSchedulingEnabled() && !paramType.equalsIgnoreCase("start_scheduler")) + if (!probe.getUnifiedRepairConfig().isUnifiedRepairSchedulingEnabled() && !paramType.equalsIgnoreCase("start_scheduler")) { - out.println("Auto-repair is not enabled"); + out.println("Unified-repair is not enabled"); return; } @@ -80,13 +80,13 @@ public void execute(NodeProbe probe) } return; case "history_clear_delete_hosts_buffer_interval": - probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); + probe.setUnifiedRepairHistoryClearDeleteHostsBufferDuration(paramVal); return; case "repair_max_retries": - probe.setAutoRepairMaxRetriesCount(Integer.parseInt(paramVal)); + probe.setUnifiedRepairMaxRetriesCount(Integer.parseInt(paramVal)); return; case "repair_retry_backoff": - probe.setAutoRepairRetryBackoff(paramVal); + probe.setUnifiedRepairRetryBackoff(paramVal); return; default: // proceed to options that require --repair-type option @@ -99,7 +99,7 @@ public void execute(NodeProbe probe) switch (paramType) { case "enabled": - probe.setAutoRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); + probe.setUnifiedRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); break; case "number_of_repair_threads": probe.setRepairThreads(repairType, Integer.parseInt(paramVal)); @@ -114,7 +114,7 @@ public void execute(NodeProbe probe) probe.setRepairSSTableCountHigherThreshold(repairType, Integer.parseInt(paramVal)); break; case "table_max_repair_time": - probe.setAutoRepairTableMaxRepairTime(repairType, paramVal); + probe.setUnifiedRepairTableMaxRepairTime(repairType, paramVal); break; case "priority_hosts": hosts = retrieveHosts(paramVal); @@ -136,7 +136,7 @@ public void execute(NodeProbe probe) { ignoreDCs.add(dc); } - probe.setAutoRepairIgnoreDCs(repairType, ignoreDCs); + probe.setUnifiedRepairIgnoreDCs(repairType, ignoreDCs); break; case "repair_primary_token_range_only": probe.setPrimaryTokenRangeOnly(repairType, Boolean.parseBoolean(paramVal)); diff --git a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/UnifiedRepairStatus.java similarity index 82% rename from src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java rename to src/java/org/apache/cassandra/tools/nodetool/UnifiedRepairStatus.java index 7b96102c6698..b40fe887e5d8 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java +++ b/src/java/org/apache/cassandra/tools/nodetool/UnifiedRepairStatus.java @@ -25,19 +25,19 @@ import io.airlift.airline.Command; import io.airlift.airline.Option; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool; import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; import static com.google.common.base.Preconditions.checkArgument; -@Command(name = "autorepairstatus", description = "Print autorepair status") -public class AutoRepairStatus extends NodeTool.NodeToolCmd +@Command(name = "unifiedrepairstatus", description = "Print unifiedrepair status") +public class UnifiedRepairStatus extends NodeTool.NodeToolCmd { @VisibleForTesting @Option(title = "repair type", name = { "-t", "--repair-type" }, description = "Repair type") - protected AutoRepairConfig.RepairType repairType; + protected UnifiedRepairConfig.RepairType repairType; @Override public void execute(NodeProbe probe) @@ -45,10 +45,10 @@ public void execute(NodeProbe probe) checkArgument(repairType != null, "--repair-type is required."); PrintStream out = probe.output().out; - AutoRepairConfig config = probe.getAutoRepairConfig(); - if (config == null || !config.isAutoRepairSchedulingEnabled()) + UnifiedRepairConfig config = probe.getUnifiedRepairConfig(); + if (config == null || !config.isUnifiedRepairSchedulingEnabled()) { - out.println("Auto-repair is not enabled"); + out.println("Unified-repair is not enabled"); return; } diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 2be783fe4a9d..14a34515dfa5 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -68,7 +68,7 @@ import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.io.util.File; -import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter; +import org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -732,11 +732,11 @@ public static AbstractCryptoProvider newCryptoProvider(String className, Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); // first attempt to initialize with Map arguments. - return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(Map.class).newInstance(parameters); + return (IUnifiedRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(Map.class).newInstance(parameters); } catch (NoSuchMethodException nsme) { // fall back on no argument constructor. - return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); + return (IUnifiedRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); } } catch (Exception ex) { - throw new ConfigurationException("Unable to create instance of IAutoRepairTokenRangeSplitter for " + className, ex); + throw new ConfigurationException("Unable to create instance of IUnifiedRepairTokenRangeSplitter for " + className, ex); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/UnifiedRepairSchedulerTest.java similarity index 83% rename from test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java rename to test/distributed/org/apache/cassandra/distributed/test/repair/UnifiedRepairSchedulerTest.java index 9b583f006507..8460ec119cee 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/UnifiedRepairSchedulerTest.java @@ -35,14 +35,14 @@ import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.test.TestBaseImpl; -import org.apache.cassandra.repair.autorepair.AutoRepair; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepair; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.service.UnifiedRepairService; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.junit.Assert.assertEquals; -public class AutoRepairSchedulerTest extends TestBaseImpl +public class UnifiedRepairSchedulerTest extends TestBaseImpl { private static Cluster cluster; @@ -57,25 +57,25 @@ public static void init() throws IOException sdf = new SimpleDateFormat(pattern); sdf.setLenient(false); cluster = Cluster.build(3).withConfig(config -> config - .set("auto_repair", + .set("unified_repair", ImmutableMap.of( "repair_type_overrides", - ImmutableMap.of(AutoRepairConfig.RepairType.full.toString(), - ImmutableMap.of( + ImmutableMap.of(UnifiedRepairConfig.RepairType.full.toString(), + ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", "parallel_repair_count", "1", "parallel_repair_percentage", "0", "min_repair_interval", "1s"), - AutoRepairConfig.RepairType.incremental.toString(), - ImmutableMap.of( + UnifiedRepairConfig.RepairType.incremental.toString(), + ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", "parallel_repair_count", "1", "parallel_repair_percentage", "0", "min_repair_interval", "1s")))) - .set("auto_repair.enabled", "true") - .set("auto_repair.repair_check_interval", "10s")).start(); + .set("unified_repair.enabled", "true") + .set("unified_repair.repair_check_interval", "10s")).start(); cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); @@ -85,7 +85,7 @@ public static void init() throws IOException public void testScheduler() throws ParseException { // ensure there was no history of previous repair runs through the scheduler - Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY), ConsistencyLevel.QUORUM); + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY), ConsistencyLevel.QUORUM); assertEquals(0, rows.length); cluster.forEach(i -> i.runOnInstance(() -> { @@ -93,9 +93,9 @@ public void testScheduler() throws ParseException { DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - AutoRepairService.instance.setup(); + UnifiedRepairService.instance.setup(); DatabaseDescriptor.setCDCOnRepairEnabled(false); - AutoRepair.instance.setup(); + UnifiedRepair.instance.setup(); } catch (Exception e) { @@ -105,13 +105,13 @@ public void testScheduler() throws ParseException // wait for a couple of minutes for repair to go through on all three nodes Uninterruptibles.sleepUninterruptibly(2, TimeUnit.MINUTES); - validate(AutoRepairConfig.RepairType.full.toString()); - validate(AutoRepairConfig.RepairType.incremental.toString()); + validate(UnifiedRepairConfig.RepairType.full.toString()); + validate(UnifiedRepairConfig.RepairType.incremental.toString()); } private void validate(String repairType) throws ParseException { - Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s where repair_type='%s'", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType), ConsistencyLevel.QUORUM); + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s where repair_type='%s'", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType), ConsistencyLevel.QUORUM); assertEquals(3, rows.length); for (int node = 0; node < rows.length; node++) { diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 0634d246d088..7177cad467c1 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -57,6 +57,14 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; +import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; +import org.apache.cassandra.gms.ApplicationState; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.gms.VersionedValue; +import org.apache.cassandra.io.util.File; + +import org.apache.cassandra.config.Config; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import org.apache.commons.lang3.StringUtils; import org.junit.Assume; import org.slf4j.Logger; @@ -132,7 +140,6 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaCollection; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; @@ -1362,11 +1369,11 @@ public static Map listSnapshots(ColumnFamilyStore cfs) return tagSnapshotsMap; } - // Replaces the global auto-repair config with a new config where auto-repair schedulling is enabled/disabled - public static void setAutoRepairEnabled(boolean enabled) throws Exception + // Replaces the global unified-repair config with a new config where unified-repair schedulling is enabled/disabled + public static void setUnifiedRepairEnabled(boolean enabled) throws Exception { Config config = DatabaseDescriptor.getRawConfig(); - config.auto_repair = new AutoRepairConfig(enabled); + config.unified_repair = new UnifiedRepairConfig(enabled); Field configField = DatabaseDescriptor.class.getDeclaredField("conf"); configField.setAccessible(true); configField.set(null, config); diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index d62e88091635..2c96f2c4602b 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -114,14 +114,14 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.Config$CorruptedTombstoneStrategy", "org.apache.cassandra.config.Config$BatchlogEndpointStrategy", "org.apache.cassandra.config.Config$TombstonesMetricGranularity", - "org.apache.cassandra.repair.autorepair.AutoRepairConfig", - "org.apache.cassandra.repair.autorepair.AutoRepairConfig$Options", - "org.apache.cassandra.repair.autorepair.DefaultAutoRepairTokenSplitter", - "org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter", - "org.apache.cassandra.repair.autorepair.FullRepairState", - "org.apache.cassandra.repair.autorepair.IncrementalRepairState", - "org.apache.cassandra.repair.autorepair.AutoRepairConfig$RepairType", - "org.apache.cassandra.repair.autorepair.AutoRepairState", + "org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig", + "org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig$Options", + "org.apache.cassandra.repair.unifiedrepair.DefaultUnifiedRepairTokenSplitter", + "org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter", + "org.apache.cassandra.repair.unifiedrepair.FullRepairState", + "org.apache.cassandra.repair.unifiedrepair.IncrementalRepairState", + "org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig$RepairType", + "org.apache.cassandra.repair.unifiedrepair.UnifiedRepairState", "org.apache.cassandra.config.DatabaseDescriptor$ByteUnit", "org.apache.cassandra.config.DataRateSpec", "org.apache.cassandra.config.DataRateSpec$DataRateUnit", diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 9a0203c301bd..0f1834d94f10 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -46,7 +46,7 @@ import static org.apache.cassandra.config.YamlConfigurationLoader.SYSTEM_PROPERTY_PREFIX; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -277,7 +277,7 @@ public void fromMapTest() Map encryptionOptions = ImmutableMap.of("cipher_suites", Collections.singletonList("FakeCipher"), "optional", false, "enabled", true); - Map autoRepairConfig = ImmutableMap.of("enabled", true, + Map unifiedRepairConfig = ImmutableMap.of("enabled", true, "global_settings", ImmutableMap.of("number_of_repair_threads", 1), "repair_type_overrides", ImmutableMap.of( @@ -291,7 +291,7 @@ public void fromMapTest() .put("internode_socket_send_buffer_size", "5B") .put("internode_socket_receive_buffer_size", "5B") .put("commitlog_sync_group_window_in_ms", "42") - .put("auto_repair", autoRepairConfig) + .put("unified_repair", unifiedRepairConfig) .build(); Config config = YamlConfigurationLoader.fromMap(map, Config.class); @@ -302,9 +302,9 @@ public void fromMapTest() assertEquals(true, config.client_encryption_options.enabled); // Check a nested object assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_send_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) - assertEquals(true, config.auto_repair.enabled); - assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.incremental)); - config.auto_repair.setMVRepairEnabled(AutoRepairConfig.RepairType.incremental, false); + assertEquals(true, config.unified_repair.enabled); + assertEquals(new DurationSpec.IntSecondsBound("6h"), config.unified_repair.getUnifiedRepairTableMaxRepairTime(UnifiedRepairConfig.RepairType.incremental)); + config.unified_repair.setMVRepairEnabled(UnifiedRepairConfig.RepairType.incremental, false); } @Test @@ -565,4 +565,4 @@ public static Config load(String path) } return new YamlConfigurationLoader().loadConfig(url); } -} \ No newline at end of file +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/SSTableRepairedAtTest.java similarity index 97% rename from test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/SSTableRepairedAtTest.java index bd14eea805b3..14011f85896d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/SSTableRepairedAtTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.net.UnknownHostException; import java.util.Arrays; @@ -55,8 +55,8 @@ public class SSTableRepairedAtTest extends CQLTester public static void setUp() throws ConfigurationException, UnknownHostException { requireNetwork(); - AutoRepairUtils.setup(); - StorageService.instance.doAutoRepairSetup(); + UnifiedRepairUtils.setup(); + StorageService.instance.doUnifiedRepairSetup(); DatabaseDescriptor.setCDCEnabled(false); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfigTest.java similarity index 77% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfigTest.java index 4f1dd029ef5e..e7ce54a1cea5 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfigTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.EnumMap; import java.util.Objects; @@ -35,7 +35,7 @@ import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.Options; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.Options; import org.apache.cassandra.utils.FBUtilities; import static org.junit.Assert.assertEquals; @@ -44,79 +44,79 @@ import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) -public class AutoRepairConfigTest extends CQLTester +public class UnifiedRepairConfigTest extends CQLTester { - private AutoRepairConfig config; + private UnifiedRepairConfig config; private Set testSet = ImmutableSet.of("dc1"); @Parameterized.Parameter - public AutoRepairConfig.RepairType repairType; + public UnifiedRepairConfig.RepairType repairType; @Parameterized.Parameters public static Object[] repairTypes() { - return AutoRepairConfig.RepairType.values(); + return UnifiedRepairConfig.RepairType.values(); } @Before public void setUp() { - config = new AutoRepairConfig(true); + config = new UnifiedRepairConfig(true); config.repair_type_overrides = null; - AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); } @Test - public void autoRepairConfigDefaultsAreNotNull() + public void unifiedRepairConfigDefaultsAreNotNull() { - AutoRepairConfig config = new AutoRepairConfig(); + UnifiedRepairConfig config = new UnifiedRepairConfig(); assertNotNull(config.global_settings); } @Test - public void autoRepairConfigRepairTypesAreNotNull() + public void unifiedRepairConfigRepairTypesAreNotNull() { - AutoRepairConfig config = new AutoRepairConfig(); - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + UnifiedRepairConfig config = new UnifiedRepairConfig(); + for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) { assertNotNull(config.repair_type_overrides.get(repairType)); } } @Test - public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsEnabled() + public void testIsUnifiedRepairEnabledReturnsTrueWhenRepairIsEnabled() { config.global_settings.enabled = true; - assertTrue(config.isAutoRepairEnabled(repairType)); + assertTrue(config.isUnifiedRepairEnabled(repairType)); } @Test - public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() + public void testIsUnifiedRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() { - config = new AutoRepairConfig(false); + config = new UnifiedRepairConfig(false); config.global_settings.enabled = true; - assertFalse(config.isAutoRepairEnabled(repairType)); + assertFalse(config.isUnifiedRepairEnabled(repairType)); } @Test - public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledForRepairType() + public void testIsUnifiedRepairEnabledReturnsTrueWhenRepairIsDisabledForRepairType() { config.global_settings.enabled = true; - config.repair_type_overrides = new EnumMap<>(AutoRepairConfig.RepairType.class); + config.repair_type_overrides = new EnumMap<>(UnifiedRepairConfig.RepairType.class); config.repair_type_overrides.put(repairType, new Options()); config.repair_type_overrides.get(repairType).enabled = false; - assertFalse(config.isAutoRepairEnabled(repairType)); + assertFalse(config.isUnifiedRepairEnabled(repairType)); } @Test - public void testSetAutoRepairEnabledNoMVOrCDC() + public void testSetUnifiedRepairEnabledNoMVOrCDC() { DatabaseDescriptor.setCDCEnabled(false); DatabaseDescriptor.setMaterializedViewsEnabled(false); - config.setAutoRepairEnabled(repairType, true); + config.setUnifiedRepairEnabled(repairType, true); assertTrue(config.repair_type_overrides.get(repairType).enabled); } @@ -194,19 +194,19 @@ public void testSetRepairMinFrequencyInHours() } @Test - public void testGetAutoRepairHistoryClearDeleteHostsBufferInSec() + public void testGetUnifiedRepairHistoryClearDeleteHostsBufferInSec() { config.history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("5s"); - int result = config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds(); + int result = config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval().toSeconds(); assertEquals(5, result); } @Test - public void testSetAutoRepairHistoryClearDeleteHostsBufferInSec() + public void testSetUnifiedRepairHistoryClearDeleteHostsBufferInSec() { - config.setAutoRepairHistoryClearDeleteHostsBufferInterval("5s"); + config.setUnifiedRepairHistoryClearDeleteHostsBufferInterval("5s"); assert Objects.equals(config.history_clear_delete_hosts_buffer_interval, new DurationSpec.IntSecondsBound("5s")); } @@ -230,19 +230,19 @@ public void testSetRepairSSTableCountHigherThreshold() } @Test - public void testGetAutoRepairTableMaxRepairTimeInSec() + public void testGetUnifiedRepairTableMaxRepairTimeInSec() { config.global_settings.table_max_repair_time = new DurationSpec.IntSecondsBound("5s"); - DurationSpec.IntSecondsBound result = config.getAutoRepairTableMaxRepairTime(repairType); + DurationSpec.IntSecondsBound result = config.getUnifiedRepairTableMaxRepairTime(repairType); assertEquals(5, result.toSeconds()); } @Test - public void testSetAutoRepairTableMaxRepairTimeInSec() + public void testSetUnifiedRepairTableMaxRepairTimeInSec() { - config.setAutoRepairTableMaxRepairTime(repairType, "5s"); + config.setUnifiedRepairTableMaxRepairTime(repairType, "5s"); assert config.repair_type_overrides.get(repairType).table_max_repair_time.toSeconds() == 5; } @@ -356,19 +356,19 @@ public void testGetForceRepairNewNode() } @Test - public void testIsAutoRepairSchedulingEnabledDefault() + public void testIsUnifiedRepairSchedulingEnabledDefault() { - config = new AutoRepairConfig(); + config = new UnifiedRepairConfig(); - boolean result = config.isAutoRepairSchedulingEnabled(); + boolean result = config.isUnifiedRepairSchedulingEnabled(); assertFalse(result); } @Test - public void testIsAutoRepairSchedulingEnabledTrue() + public void testIsUnifiedRepairSchedulingEnabledTrue() { - boolean result = config.isAutoRepairSchedulingEnabled(); + boolean result = config.isUnifiedRepairSchedulingEnabled(); assertTrue(result); } @@ -386,16 +386,16 @@ public void testGetDefaultOptionsTokenRangeSplitter() { Options defaultOptions = Options.getDefaultOptions(); - ParameterizedClass expectedDefault = new ParameterizedClass(DefaultAutoRepairTokenSplitter.class.getName(), Collections.emptyMap()); + ParameterizedClass expectedDefault = new ParameterizedClass(DefaultUnifiedRepairTokenSplitter.class.getName(), Collections.emptyMap()); assertEquals(expectedDefault, defaultOptions.token_range_splitter); - assertEquals(DefaultAutoRepairTokenSplitter.class.getName(), FBUtilities.newAutoRepairTokenRangeSplitter(defaultOptions.token_range_splitter).getClass().getName()); + assertEquals(DefaultUnifiedRepairTokenSplitter.class.getName(), FBUtilities.newUnifiedRepairTokenRangeSplitter(defaultOptions.token_range_splitter).getClass().getName()); } @Test(expected = ConfigurationException.class) public void testInvalidTokenRangeSplitter() { - FBUtilities.newAutoRepairTokenRangeSplitter(new ParameterizedClass("invalid-class", Collections.emptyMap())); + FBUtilities.newUnifiedRepairTokenRangeSplitter(new ParameterizedClass("invalid-class", Collections.emptyMap())); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairDefaultTokenSplitterParameterizedTest.java similarity index 84% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairDefaultTokenSplitterParameterizedTest.java index 804d0a712b48..f14f6acefeac 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairDefaultTokenSplitterParameterizedTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.ArrayList; import java.util.Arrays; @@ -37,8 +37,8 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.sai.disk.format.Version; -import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.service.UnifiedRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -48,7 +48,7 @@ import static org.junit.Assert.assertEquals; @RunWith(Parameterized.class) -public class AutoRepairDefaultTokenSplitterParameterizedTest +public class UnifiedRepairDefaultTokenSplitterParameterizedTest { private static final String KEYSPACE = "ks"; private static final String TABLE1 = "tbl1"; @@ -56,12 +56,12 @@ public class AutoRepairDefaultTokenSplitterParameterizedTest private static final String TABLE3 = "tbl3"; @Parameterized.Parameter() - public AutoRepairConfig.RepairType repairType; + public UnifiedRepairConfig.RepairType repairType; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() + public static Collection repairTypes() { - return Arrays.asList(AutoRepairConfig.RepairType.values()); + return Arrays.asList(UnifiedRepairConfig.RepairType.values()); } @BeforeClass @@ -83,7 +83,7 @@ public static void setupClass() throws Exception ServerTestUtils.registerLocal(tokens); // Ensure that the on-disk format statics are loaded before the test run Version.LATEST.onDiskFormat(); - StorageService.instance.doAutoRepairSetup(); + StorageService.instance.doUnifiedRepairSetup(); SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); @@ -111,7 +111,7 @@ private static void appendExpectedTokens(long left, long right, int numberOfSpli @Test public void testTokenRangesSplitByTable() { - AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); + UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairByKeyspace(repairType, false); int totalTokenRanges = 3; Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(totalTokenRanges, tokens.size()); @@ -125,9 +125,9 @@ public void testTokenRangesSplitByTable() appendExpectedTokens(256, 1024, numberOfSplits, expectedToken); } - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setRepairSubRangeNum(repairType, numberOfSplits); - List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); + List assignments = new DefaultUnifiedRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); assertEquals(totalTokenRanges * numberOfSplits * tables.size(), assignments.size()); assertEquals(expectedToken.size(), assignments.size()); @@ -146,7 +146,7 @@ public void testTokenRangesSplitByTable() @Test public void testTokenRangesSplitByKeyspace() { - AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); + UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairByKeyspace(repairType, true); int totalTokenRanges = 3; Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(totalTokenRanges, tokens.size()); @@ -157,9 +157,9 @@ public void testTokenRangesSplitByKeyspace() appendExpectedTokens(0, 256, numberOfSplits, expectedToken); appendExpectedTokens(256, 1024, numberOfSplits, expectedToken); - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setRepairSubRangeNum(repairType, numberOfSplits); - List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); + List assignments = new DefaultUnifiedRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); assertEquals(totalTokenRanges * numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairKeyspaceTest.java similarity index 87% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairKeyspaceTest.java index 1337cf3dd2d3..748cc88b2a2e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairKeyspaceTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.HashSet; import java.util.Iterator; @@ -29,7 +29,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; -public class AutoRepairKeyspaceTest +public class UnifiedRepairKeyspaceTest { @BeforeClass public static void setupDatabaseDescriptor() @@ -38,7 +38,7 @@ public static void setupDatabaseDescriptor() } @Test - public void testEnsureAutoRepairTablesArePresent() + public void testEnsureUnifiedRepairTablesArePresent() { KeyspaceMetadata keyspaceMetadata = SystemDistributedKeyspace.metadata(); Iterator iter = keyspaceMetadata.tables.iterator(); @@ -48,7 +48,7 @@ public void testEnsureAutoRepairTablesArePresent() actualDistributedTablesIter.add(iter.next().name); } - Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); - Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); + Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY)); + Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY)); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairParameterizedTest.java similarity index 52% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairParameterizedTest.java index c7533c6dba0e..b080cede0c9c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairParameterizedTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.ArrayList; import java.util.Arrays; @@ -37,7 +37,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter.RepairAssignment; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.service.StorageService; @@ -56,20 +56,20 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.metrics.AutoRepairMetricsManager; -import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.UnifiedRepairMetricsManager; +import org.apache.cassandra.metrics.UnifiedRepairMetrics; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.UnifiedRepairService; import org.apache.cassandra.utils.FBUtilities; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; -import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.Util.setUnifiedRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.NOT_MY_TURN; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -82,43 +82,43 @@ import static org.mockito.Mockito.when; @RunWith(Parameterized.class) -public class AutoRepairParameterizedTest extends CQLTester +public class UnifiedRepairParameterizedTest extends CQLTester { private static final String KEYSPACE = "ks"; private static final String TABLE = "tbl"; - private static final String TABLE_DISABLED_AUTO_REPAIR = "tbl_disabled_auto_repair"; + private static final String TABLE_DISABLED_UNIFIED_REPAIR = "tbl_disabled_unified_repair"; private static final String MV = "mv"; private static TableMetadata cfm; - private static TableMetadata cfmDisabledAutoRepair; + private static TableMetadata cfmDisabledUnifiedRepair; private static Keyspace keyspace; private static int timeFuncCalls; @Mock ScheduledExecutorPlus mockExecutor; @Mock - AutoRepairState autoRepairState; + UnifiedRepairState unifiedRepairState; @Mock RepairCoordinator repairRunnable; - private static AutoRepairConfig defaultConfig; + private static UnifiedRepairConfig defaultConfig; @Parameterized.Parameter() - public AutoRepairConfig.RepairType repairType; + public UnifiedRepairConfig.RepairType repairType; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() + public static Collection repairTypes() { - return Arrays.asList(AutoRepairConfig.RepairType.values()); + return Arrays.asList(UnifiedRepairConfig.RepairType.values()); } @BeforeClass public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); - setAutoRepairEnabled(true); + UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + setUnifiedRepairEnabled(true); requireNetwork(); - AutoRepairUtils.setup(); - StorageService.instance.doAutoRepairSetup(); + UnifiedRepairUtils.setup(); + StorageService.instance.doUnifiedRepairSetup(); DatabaseDescriptor.setCDCEnabled(false); } @@ -128,7 +128,7 @@ public void setup() SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", KEYSPACE, TABLE)); - QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH repair_full = {'enabled': 'false'} AND repair_incremental = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH repair_full = {'enabled': 'false'} AND repair_incremental = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_UNIFIED_REPAIR)); QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); @@ -143,23 +143,23 @@ public void setup() Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).truncateBlocking(); Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).disableAutoCompaction(); - Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY).truncateBlocking(); - Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY).truncateBlocking(); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY).truncateBlocking(); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY).truncateBlocking(); - AutoRepair.instance = new AutoRepair(); + UnifiedRepair.instance = new UnifiedRepair(); executeCQL(); timeFuncCalls = 0; - AutoRepair.timeFunc = System::currentTimeMillis; + UnifiedRepair.timeFunc = System::currentTimeMillis; resetCounters(); resetConfig(); - AutoRepair.shuffleFunc = java.util.Collections::shuffle; + UnifiedRepair.shuffleFunc = java.util.Collections::shuffle; keyspace = Keyspace.open(KEYSPACE); cfm = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).metadata(); - cfmDisabledAutoRepair = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE_DISABLED_AUTO_REPAIR).metadata(); + cfmDisabledUnifiedRepair = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE_DISABLED_UNIFIED_REPAIR).metadata(); DatabaseDescriptor.setCDCOnRepairEnabled(false); } @@ -171,7 +171,7 @@ public void tearDown() private void resetCounters() { - AutoRepairMetrics metrics = AutoRepairMetricsManager.getMetrics(repairType); + UnifiedRepairMetrics metrics = UnifiedRepairMetricsManager.getMetrics(repairType); Metrics.removeMatching((name, metric) -> name.startsWith("repairTurn")); metrics.repairTurnMyTurn = Metrics.counter(String.format("repairTurnMyTurn-%s", repairType)); metrics.repairTurnMyTurnForceRepair = Metrics.counter(String.format("repairTurnMyTurnForceRepair-%s", repairType)); @@ -181,15 +181,15 @@ private void resetCounters() private void resetConfig() { // prepare a fresh default config - defaultConfig = new AutoRepairConfig(true); - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + defaultConfig = new UnifiedRepairConfig(true); + for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) { - defaultConfig.setAutoRepairEnabled(repairType, true); + defaultConfig.setUnifiedRepairEnabled(repairType, true); defaultConfig.setMVRepairEnabled(repairType, false); } - // reset the AutoRepairService config to default - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + // reset the UnifiedRepairService config to default + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.repair_type_overrides = defaultConfig.repair_type_overrides; config.global_settings = defaultConfig.global_settings; config.history_clear_delete_hosts_buffer_interval = defaultConfig.history_clear_delete_hosts_buffer_interval; @@ -201,24 +201,24 @@ private void executeCQL() QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, s) VALUES ('k', 's')"); QueryProcessor.executeInternal("SELECT s FROM ks.tbl WHERE k='k'"); Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) - .getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY) + .getColumnFamilyStore(SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY) .forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); } @Test(expected = ConfigurationException.class) public void testRepairAsyncWithRepairTypeDisabled() { - AutoRepairService.instance.getAutoRepairConfig().setAutoRepairEnabled(repairType, false); + UnifiedRepairService.instance.getUnifiedRepairConfig().setUnifiedRepairEnabled(repairType, false); - AutoRepair.instance.repairAsync(repairType); + UnifiedRepair.instance.repairAsync(repairType); } @Test public void testRepairAsync() { - AutoRepair.instance.repairExecutors.put(repairType, mockExecutor); + UnifiedRepair.instance.repairExecutors.put(repairType, mockExecutor); - AutoRepair.instance.repairAsync(repairType); + UnifiedRepair.instance.repairAsync(repairType); verify(mockExecutor, Mockito.times(1)).submit(Mockito.any(Runnable.class)); @@ -228,17 +228,17 @@ public void testRepairAsync() public void testRepairTurn() { UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + Assert.assertTrue("Expected my turn for the repair", UnifiedRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); } @Test public void testRepair() { - AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repair(repairType); - assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - long lastRepairTime = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairMinInterval(repairType, "0s"); + UnifiedRepair.instance.repair(repairType); + assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + long lastRepairTime = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); //if repair was done then lastRepairTime should be non-zero Assert.assertTrue(String.format("Expected lastRepairTime > 0, actual value lastRepairTime %d", lastRepairTime), lastRepairTime > 0); @@ -247,113 +247,113 @@ public void testRepair() @Test public void testTooFrequentRepairs() { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); //in the first round let repair run config.setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repair(repairType); - long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); - int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + UnifiedRepair.instance.repair(repairType); + long lastRepairTime1 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); + int consideredTables = UnifiedRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), consideredTables, 0); //if repair was done in last 24 hours then it should not trigger another repair config.setRepairMinInterval(repairType, "24h"); - AutoRepair.instance.repair(repairType); - long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + UnifiedRepair.instance.repair(repairType); + long lastRepairTime2 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertEquals(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); - assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); } @Test public void testNonFrequentRepairs() { - Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); - AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + Integer prevMetricsCount = UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + UnifiedRepairState state = UnifiedRepair.instance.repairStates.get(repairType); long prevCount = state.getTotalMVTablesConsideredForRepair(); - AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repair(repairType); - long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairMinInterval(repairType, "0s"); + UnifiedRepair.instance.repair(repairType); + long lastRepairTime1 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertTrue(String.format("Expected lastRepairTime1 > 0, actual value lastRepairTime1 %d", lastRepairTime1), lastRepairTime1 > 0); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); Assert.assertTrue("Expected my turn for the repair", - AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); - AutoRepair.instance.repair(repairType); - long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + UnifiedRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + UnifiedRepair.instance.repair(repairType); + long lastRepairTime2 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertNotSame(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + "lastRepairTime2 ", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); - assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + assertEquals(prevMetricsCount, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @Test public void testGetPriorityHosts() { - Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); - AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + Integer prevMetricsCount = UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + UnifiedRepairState state = UnifiedRepair.instance.repairStates.get(repairType); long prevCount = state.getTotalMVTablesConsideredForRepair(); - AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairMinInterval(repairType, "0s"); Assert.assertSame(String.format("Priority host count is not same, actual value %d, expected value %d", - AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); + UnifiedRepairUtils.getPriorityHosts(repairType).size(), 0), UnifiedRepairUtils.getPriorityHosts(repairType).size(), 0); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != + Assert.assertTrue("Expected my turn for the repair", UnifiedRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); - AutoRepair.instance.repair(repairType); - AutoRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); + UnifiedRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); + UnifiedRepair.instance.repair(repairType); Assert.assertSame(String.format("Priority host count is not same actual value %d, expected value %d", - AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); + UnifiedRepairUtils.getPriorityHosts(repairType).size(), 0), UnifiedRepairUtils.getPriorityHosts(repairType).size(), 0); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); - assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + assertEquals(prevMetricsCount, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @Test - public void testCheckAutoRepairStartStop() throws Throwable + public void testCheckUnifiedRepairStartStop() throws Throwable { - Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); - AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + Integer prevMetricsCount = UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + UnifiedRepairState state = UnifiedRepair.instance.repairStates.get(repairType); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); long prevCount = state.getTotalMVTablesConsideredForRepair(); config.setRepairMinInterval(repairType, "0s"); - config.setAutoRepairEnabled(repairType, false); - long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); - AutoRepair.instance.repair(repairType); - long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + config.setUnifiedRepairEnabled(repairType, false); + long lastRepairTime1 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); + UnifiedRepair.instance.repair(repairType); + long lastRepairTime2 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); //Since repair has not happened, both the last repair times should be same Assert.assertEquals(String.format("Expected lastRepairTime1 %d, and lastRepairTime2 %d to be same", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); - config.setAutoRepairEnabled(repairType, true); - AutoRepair.instance.repair(repairType); + config.setUnifiedRepairEnabled(repairType, true); + UnifiedRepair.instance.repair(repairType); //since repair is done now, so lastRepairTime1/lastRepairTime2 and lastRepairTime3 should not be same - long lastRepairTime3 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + long lastRepairTime3 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertNotSame(String.format("Expected lastRepairTime1 %d, and lastRepairTime3 %d to be not same", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime3); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); - assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + assertEquals(prevMetricsCount, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @Test public void testRepairPrimaryRangesByDefault() { Assert.assertTrue("Expected primary range repair only", - AutoRepairService.instance.getAutoRepairConfig().getRepairPrimaryTokenRangeOnly(repairType)); + UnifiedRepairService.instance.getUnifiedRepairConfig().getRepairPrimaryTokenRangeOnly(repairType)); } @Test public void testGetAllMVs() { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setMVRepairEnabled(repairType, false); assertFalse(config.getMVRepairEnabled(repairType)); - assertEquals(0, AutoRepairUtils.getAllMVs(repairType, keyspace, cfm).size()); + assertEquals(0, UnifiedRepairUtils.getAllMVs(repairType, keyspace, cfm).size()); config.setMVRepairEnabled(repairType, true); assertTrue(config.getMVRepairEnabled(repairType)); - assertEquals(Arrays.asList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); + assertEquals(Arrays.asList(MV), UnifiedRepairUtils.getAllMVs(repairType, keyspace, cfm)); config.setMVRepairEnabled(repairType, false); } @@ -361,32 +361,32 @@ public void testGetAllMVs() @Test public void testMVRepair() { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setMVRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - AutoRepair.instance.repair(repairType); - assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + UnifiedRepair.instance.repair(repairType); + assertEquals(1, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); config.setMVRepairEnabled(repairType, false); - AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - AutoRepair.instance.repair(repairType); - assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + UnifiedRepair.instance.repair(repairType); + assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); config.setMVRepairEnabled(repairType, true); - AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - AutoRepair.instance.repair(repairType); - assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + UnifiedRepair.instance.repair(repairType); + assertEquals(1, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); } @Test public void testSkipRepairSSTableCountHigherThreshold() { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + UnifiedRepairState state = UnifiedRepair.instance.repairStates.get(repairType); ColumnFamilyStore cfsBaseTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE); ColumnFamilyStore cfsMVTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(MV); Set preBaseTable = cfsBaseTable.getLiveSSTables(); @@ -414,89 +414,89 @@ public void testSkipRepairSSTableCountHigherThreshold() config.setMVRepairEnabled(repairType, true); config.setRepairSSTableCountHigherThreshold(repairType, 9); assertEquals(0, state.getSkippedTokenRangesCount()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); state.setLastRepairTime(0); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); assertEquals(0, state.getTotalMVTablesConsideredForRepair()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); // skipping both the tables - one table is due to its repair has been disabled, and another one due to high sstable count assertEquals(0, state.getSkippedTokenRangesCount()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); assertEquals(2, state.getSkippedTablesCount()); - assertEquals(2, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertEquals(2, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); // set it to higher value, and this time, the tables should not be skipped config.setRepairSSTableCountHigherThreshold(repairType, beforeCount); state.setLastRepairTime(0); state.setSkippedTablesCount(0); state.setTotalMVTablesConsideredForRepair(0); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); assertEquals(1, state.getTotalMVTablesConsideredForRepair()); - assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); assertEquals(0, state.getSkippedTokenRangesCount()); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); assertEquals(1, state.getSkippedTablesCount()); - assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); } @Test public void testGetRepairState() { - assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getRepairKeyspaceCount()); + assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getRepairKeyspaceCount()); - AutoRepairState state = AutoRepair.instance.getRepairState(repairType); + UnifiedRepairState state = UnifiedRepair.instance.getRepairState(repairType); state.setRepairKeyspaceCount(100); - assertEquals(100L, AutoRepair.instance.getRepairState(repairType).getRepairKeyspaceCount()); + assertEquals(100L, UnifiedRepair.instance.getRepairState(repairType).getRepairKeyspaceCount()); } @Test public void testMetrics() { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setMVRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); - config.setAutoRepairTableMaxRepairTime(repairType, "0s"); - AutoRepair.timeFunc = () -> { + config.setUnifiedRepairTableMaxRepairTime(repairType, "0s"); + UnifiedRepair.timeFunc = () -> { timeFuncCalls++; return timeFuncCalls * 1000L; }; - AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(1000L); + UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(1000L); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); - assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - assertTrue(AutoRepairMetricsManager.getMetrics(repairType).nodeRepairTimeInSec.getValue() > 0); - assertTrue(AutoRepairMetricsManager.getMetrics(repairType).clusterRepairTimeInSec.getValue() > 0); - assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).repairTurnMyTurn.getCount()); - assertTrue(AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue() > 0); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue().intValue()); + assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).nodeRepairTimeInSec.getValue() > 0); + assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).clusterRepairTimeInSec.getValue() > 0); + assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).repairTurnMyTurn.getCount()); + assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue() > 0); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue().intValue()); - config.setAutoRepairTableMaxRepairTime(repairType, String.valueOf(Integer.MAX_VALUE-1) + 's'); - AutoRepair.instance.repairStates.put(repairType, autoRepairState); - when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) + config.setUnifiedRepairTableMaxRepairTime(repairType, String.valueOf(Integer.MAX_VALUE - 1) + 's'); + UnifiedRepair.instance.repairStates.put(repairType, unifiedRepairState); + when(unifiedRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); - when(autoRepairState.getFailedTokenRangesCount()).thenReturn(10); - when(autoRepairState.getSucceededTokenRangesCount()).thenReturn(11); - when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); + when(unifiedRepairState.getFailedTokenRangesCount()).thenReturn(10); + when(unifiedRepairState.getSucceededTokenRangesCount()).thenReturn(11); + when(unifiedRepairState.getLongestUnrepairedSec()).thenReturn(10); - AutoRepair.instance.repair(repairType); - assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); - assertTrue(AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue() > 0); - assertTrue(AutoRepairMetricsManager.getMetrics(repairType).succeededTokenRangesCount.getValue() > 0); - assertTrue(AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue() > 0); + UnifiedRepair.instance.repair(repairType); + assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue() > 0); + assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).succeededTokenRangesCount.getValue() > 0); + assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue() > 0); } @Test public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws Exception { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setMVRepairEnabled(repairType, false); config.setRepairRetryBackoff("0s"); - when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) + when(unifiedRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); - AutoRepair.instance.repairStates.put(repairType, autoRepairState); - when(autoRepairState.getLastRepairTime()).thenReturn((long) 0); + UnifiedRepair.instance.repairStates.put(repairType, unifiedRepairState); + when(unifiedRepairState.getLastRepairTime()).thenReturn((long) 0); AtomicInteger resetWaitConditionCalls = new AtomicInteger(); AtomicInteger waitForRepairCompletedCalls = new AtomicInteger(); doAnswer(invocation -> { @@ -504,36 +504,36 @@ public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws assertEquals("waitForRepairToComplete was called before resetWaitCondition", resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get() + 1); return null; - }).when(autoRepairState).resetWaitCondition(); + }).when(unifiedRepairState).resetWaitCondition(); doAnswer(invocation -> { waitForRepairCompletedCalls.getAndIncrement(); assertEquals("resetWaitCondition was not called before waitForRepairToComplete", resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get()); return null; - }).when(autoRepairState).waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); + }).when(unifiedRepairState).waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); - AutoRepair.instance.repair(repairType); - AutoRepair.instance.repair(repairType); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); } @Test - public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() + public void testDisabledUnifiedRepairForATableThroughTableLevelConfiguration() { - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertTrue(cfm.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).repairEnabled()); + Assert.assertTrue(cfm.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertFalse(cfmDisabledUnifiedRepair.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).repairEnabled()); + Assert.assertFalse(cfmDisabledUnifiedRepair.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).repairEnabled()); - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setRepairMinInterval(repairType, "0s"); - int disabledTablesRepairCountBefore = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); - AutoRepair.instance.repair(repairType); - int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + int disabledTablesRepairCountBefore = UnifiedRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); + UnifiedRepair.instance.repair(repairType); + int consideredTables = UnifiedRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), consideredTables, 0); - int disabledTablesRepairCountAfter = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); - Assert.assertTrue(String.format("A table %s should be skipped from auto repair, expected value: %d, actual value %d ", TABLE_DISABLED_AUTO_REPAIR, disabledTablesRepairCountBefore + 1, disabledTablesRepairCountAfter), + int disabledTablesRepairCountAfter = UnifiedRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); + Assert.assertTrue(String.format("A table %s should be skipped from unified repair, expected value: %d, actual value %d ", TABLE_DISABLED_UNIFIED_REPAIR, disabledTablesRepairCountBefore + 1, disabledTablesRepairCountAfter), disabledTablesRepairCountBefore < disabledTablesRepairCountAfter); } @@ -544,7 +544,7 @@ public void testTokenRangesNoSplit() assertEquals(1, tokens.size()); List> expectedToken = new ArrayList<>(tokens); - List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, Collections.singletonList(TABLE)); + List assignments = new DefaultUnifiedRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, Collections.singletonList(TABLE)); assertEquals(1, assignments.size()); assertEquals(expectedToken.get(0).left, assignments.get(0).getTokenRange().left); assertEquals(expectedToken.get(0).right, assignments.get(0).getTokenRange().right); @@ -558,12 +558,12 @@ public void testTableAttribute() } @Test - public void testDefaultAutomatedRepair() + public void testDefaultUnifiedRepair() { - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertTrue(cfm.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).repairEnabled()); + Assert.assertTrue(cfm.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertFalse(cfmDisabledUnifiedRepair.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).repairEnabled()); + Assert.assertFalse(cfmDisabledUnifiedRepair.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).repairEnabled()); } @Test @@ -571,7 +571,7 @@ public void testRepairShufflesKeyspacesAndTables() { AtomicInteger shuffleKeyspacesCall = new AtomicInteger(); AtomicInteger shuffleTablesCall = new AtomicInteger(); - AutoRepair.shuffleFunc = (List list) -> { + UnifiedRepair.shuffleFunc = (List list) -> { if (!list.isEmpty()) { assertTrue(list.get(0) instanceof Keyspace || list.get(0) instanceof String); @@ -587,9 +587,9 @@ else if (list.get(0) instanceof String) } }; - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); assertEquals(1, shuffleKeyspacesCall.get()); assertEquals(5, shuffleTablesCall.get()); @@ -598,88 +598,88 @@ else if (list.get(0) instanceof String) @Test public void testRepairTakesLastRepairTimeFromDB() { - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); config.setMVRepairEnabled(repairType, true); long lastRepairTime = System.currentTimeMillis() - 1000; - AutoRepairUtils.insertNewRepairHistory(repairType, 0, lastRepairTime); - AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + UnifiedRepairUtils.insertNewRepairHistory(repairType, 0, lastRepairTime); + UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(0); config.setRepairMinInterval(repairType, "1h"); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); // repair scheduler should not attempt to run repair as last repair time in DB is current time - 1s - assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair()); + assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair()); // repair scheduler should load the repair time from the DB - assertEquals(lastRepairTime, AutoRepair.instance.repairStates.get(repairType).getLastRepairTime()); + assertEquals(lastRepairTime, UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime()); } @Test public void testRepairMaxRetries() { - when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); - when(autoRepairState.isSuccess()).thenReturn(false); - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + when(unifiedRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + when(unifiedRepairState.isSuccess()).thenReturn(false); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); AtomicInteger sleepCalls = new AtomicInteger(); - AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + UnifiedRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.SECONDS, unit); assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); }; config.setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repairStates.put(repairType, autoRepairState); + UnifiedRepair.instance.repairStates.put(repairType, unifiedRepairState); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); //system_auth.role_permissions,system_auth.network_permissions,system_auth.role_members,system_auth.roles, // system_auth.resource_role_permissons_index,system_traces.sessions,system_traces.events,ks.tbl, - // system_distributed.auto_repair_priority,system_distributed.repair_history,system_distributed.auto_repair_history, + // system_distributed.unified_repair_priority,system_distributed.repair_history,system_distributed.unified_repair_history, // system_distributed.view_build_status,system_distributed.parent_repair_history,system_distributed.partition_denylist int exptedTablesGoingThroughRepair = 18; assertEquals(config.getRepairMaxRetries()*exptedTablesGoingThroughRepair, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(exptedTablesGoingThroughRepair); + verify(unifiedRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); + verify(unifiedRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); + verify(unifiedRepairState, Mockito.times(1)).setFailedTokenRangesCount(exptedTablesGoingThroughRepair); } @Test public void testRepairSuccessAfterRetry() { - when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + when(unifiedRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); AtomicInteger sleepCalls = new AtomicInteger(); - AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + UnifiedRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.SECONDS, unit); assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); }; - when(autoRepairState.isSuccess()).then((invocationOnMock) -> { + when(unifiedRepairState.isSuccess()).then((invocationOnMock) -> { if (sleepCalls.get() == 0) { return false; } return true; }); config.setRepairMinInterval(repairType, "0s"); - AutoRepair.instance.repairStates.put(repairType, autoRepairState); - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repairStates.put(repairType, unifiedRepairState); + UnifiedRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(18); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + verify(unifiedRepairState, Mockito.times(1)).setSucceededTokenRangesCount(18); + verify(unifiedRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); + verify(unifiedRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } @Test public void testRepairThrowsForIRWithMVReplay() { - AutoRepair.instance.setup(); + UnifiedRepair.instance.setup(); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - if (repairType == AutoRepairConfig.RepairType.incremental) + if (repairType == UnifiedRepairConfig.RepairType.incremental) { try { - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); fail("Expected ConfigurationException"); } catch (ConfigurationException ignored) @@ -688,7 +688,7 @@ public void testRepairThrowsForIRWithMVReplay() } else { - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); } } @@ -696,14 +696,14 @@ public void testRepairThrowsForIRWithMVReplay() @Test public void testRepairThrowsForIRWithCDCReplay() { - AutoRepair.instance.setup(); + UnifiedRepair.instance.setup(); DatabaseDescriptor.setCDCOnRepairEnabled(true); - if (repairType == AutoRepairConfig.RepairType.incremental) + if (repairType == UnifiedRepairConfig.RepairType.incremental) { try { - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); fail("Expected ConfigurationException"); } catch (ConfigurationException ignored) @@ -712,7 +712,7 @@ public void testRepairThrowsForIRWithCDCReplay() } else { - AutoRepair.instance.repair(repairType); + UnifiedRepair.instance.repair(repairType); } } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateFactoryTest.java similarity index 76% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateFactoryTest.java index a0e5bdc45294..c1ee56e1065d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateFactoryTest.java @@ -16,24 +16,24 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import org.junit.Test; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -public class AutoRepairStateFactoryTest +public class UnifiedRepairStateFactoryTest { @Test public void testGetRepairState() { - AutoRepairState state = RepairType.getAutoRepairState(RepairType.full); + UnifiedRepairState state = RepairType.getUnifiedRepairState(RepairType.full); assert state instanceof FullRepairState; - state = RepairType.getAutoRepairState(RepairType.incremental); + state = RepairType.getUnifiedRepairState(RepairType.incremental); assert state instanceof IncrementalRepairState; } @@ -42,7 +42,7 @@ public void testGetRepairState() { public void testGetRepairStateSupportsAllRepairTypes() { for (RepairType repairType : RepairType.values()) { try { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); assertNotNull(state); } catch (IllegalArgumentException e) { assertNull(e); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateTest.java similarity index 72% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateTest.java index f0974dd83c1a..df60ffc4e484 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.Arrays; import java.util.Collection; @@ -33,9 +33,9 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.UnifiedRepairHistory; +import org.apache.cassandra.service.UnifiedRepairService; import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.progress.ProgressEvent; import org.apache.cassandra.utils.progress.ProgressEventType; @@ -50,7 +50,7 @@ import static org.mockito.MockitoAnnotations.initMocks; @RunWith(Parameterized.class) -public class AutoRepairStateTest extends CQLTester +public class UnifiedRepairStateTest extends CQLTester { private static final String testTable = "test"; @@ -68,15 +68,15 @@ public static Collection repairTypes() @Before public void setUp() { - AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); initMocks(this); createTable(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int)", KEYSPACE, testTable)); } @Test public void testGetRepairRunnable() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - AutoRepairService.setup(); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + UnifiedRepairService.setup(); Runnable runnable = state.getRepairRunnable(KEYSPACE, ImmutableList.of(testTable), ImmutableSet.of(), false); @@ -86,7 +86,7 @@ public void testGetRepairRunnable() { @Test public void testProgressError() throws InterruptedException { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); state.progress("test", progressEvent); @@ -98,7 +98,7 @@ public void testProgressError() throws InterruptedException @Test public void testProgress_progress() throws InterruptedException { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); when(progressEvent.getType()).thenReturn(ProgressEventType.PROGRESS); state.progress("test", progressEvent); @@ -111,7 +111,7 @@ public void testProgress_progress() throws InterruptedException @Test public void testProgress_complete() throws InterruptedException { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); when(progressEvent.getType()).thenReturn(ProgressEventType.COMPLETE); state.progress("test", progressEvent); @@ -123,7 +123,7 @@ public void testProgress_complete() throws InterruptedException @Test public void testWaitForRepairToComplete() throws Exception { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.condition.signalAll(); Condition finishedCondition = Condition.newOneTimeCondition(); Callable waitForRepairToComplete = () -> { @@ -139,7 +139,7 @@ public void testWaitForRepairToComplete() throws Exception @Test public void testGetLastRepairTime() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.lastRepairTimeInMs = 1; assertEquals(1, state.getLastRepairTime()); @@ -147,7 +147,7 @@ public void testGetLastRepairTime() { @Test public void testSetTotalTablesConsideredForRepair() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setTotalTablesConsideredForRepair(1); @@ -156,7 +156,7 @@ public void testSetTotalTablesConsideredForRepair() { @Test public void testGetTotalTablesConsideredForRepair() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.totalTablesConsideredForRepair = 1; assertEquals(1, state.getTotalTablesConsideredForRepair()); @@ -164,7 +164,7 @@ public void testGetTotalTablesConsideredForRepair() { @Test public void testSetLastRepairTimeInMs() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setLastRepairTime(1); @@ -173,7 +173,7 @@ public void testSetLastRepairTimeInMs() { @Test public void testGetClusterRepairTimeInSec() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.clusterRepairTimeInSec = 1; assertEquals(1, state.getClusterRepairTimeInSec()); @@ -181,7 +181,7 @@ public void testGetClusterRepairTimeInSec() { @Test public void testGetNodeRepairTimeInSec() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.nodeRepairTimeInSec = 1; assertEquals(1, state.getNodeRepairTimeInSec()); @@ -189,7 +189,7 @@ public void testGetNodeRepairTimeInSec() { @Test public void testSetRepairInProgress() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setRepairInProgress(true); @@ -198,7 +198,7 @@ public void testSetRepairInProgress() { @Test public void testIsRepairInProgress() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.repairInProgress = true; assertTrue(state.isRepairInProgress()); @@ -206,7 +206,7 @@ public void testIsRepairInProgress() { @Test public void testSetSkippedTokenRangesCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setSkippedTokenRangesCount(1); @@ -215,7 +215,7 @@ public void testSetSkippedTokenRangesCount() { @Test public void testGetSkippedTokenRangesCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.skippedTokenRangesCount = 1; assertEquals(1, state.getSkippedTokenRangesCount()); @@ -223,7 +223,7 @@ public void testGetSkippedTokenRangesCount() { @Test public void testGetLongestUnrepairedSecNull() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.longestUnrepairedNode = null; try @@ -236,10 +236,10 @@ public void testGetLongestUnrepairedSecNull() { @Test public void testGetLongestUnrepairedSec() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.longestUnrepairedNode = new AutoRepairHistory(UUID.randomUUID(), "", 0, 1000, - null, 0, false); - AutoRepairState.timeFunc = () -> 2000L; + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + state.longestUnrepairedNode = new UnifiedRepairHistory(UUID.randomUUID(), "", 0, 1000, + null, 0, false); + UnifiedRepairState.timeFunc = () -> 2000L; try { @@ -251,7 +251,7 @@ public void testGetLongestUnrepairedSec() { @Test public void testSetTotalMVTablesConsideredForRepair() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setTotalMVTablesConsideredForRepair(1); @@ -260,7 +260,7 @@ public void testSetTotalMVTablesConsideredForRepair() { @Test public void testGetTotalMVTablesConsideredForRepair() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.totalMVTablesConsideredForRepair = 1; assertEquals(1, state.getTotalMVTablesConsideredForRepair()); @@ -268,7 +268,7 @@ public void testGetTotalMVTablesConsideredForRepair() { @Test public void testSetNodeRepairTimeInSec() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setNodeRepairTimeInSec(1); @@ -277,7 +277,7 @@ public void testSetNodeRepairTimeInSec() { @Test public void testSetClusterRepairTimeInSec() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setClusterRepairTimeInSec(1); @@ -286,7 +286,7 @@ public void testSetClusterRepairTimeInSec() { @Test public void testSetRepairKeyspaceCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setRepairKeyspaceCount(1); @@ -294,7 +294,7 @@ public void testSetRepairKeyspaceCount() { } @Test public void testGetRepairKeyspaceCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.repairKeyspaceCount = 1; assertEquals(1, state.getRepairKeyspaceCount()); @@ -302,8 +302,8 @@ public void testGetRepairKeyspaceCount() { @Test public void testSetLongestUnrepairedNode() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + UnifiedRepairHistory history = new UnifiedRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); state.setLongestUnrepairedNode(history); @@ -312,7 +312,7 @@ public void testSetLongestUnrepairedNode() { @Test public void testSetSucceededTokenRangesCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setSucceededTokenRangesCount(1); @@ -321,7 +321,7 @@ public void testSetSucceededTokenRangesCount() { @Test public void testGetSucceededTokenRangesCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.succeededTokenRangesCount = 1; assertEquals(1, state.getSucceededTokenRangesCount()); @@ -329,7 +329,7 @@ public void testGetSucceededTokenRangesCount() { @Test public void testSetFailedTokenRangesCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.setFailedTokenRangesCount(1); @@ -338,7 +338,7 @@ public void testSetFailedTokenRangesCount() { @Test public void testGetFailedTokenRangesCount() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.failedTokenRangesCount = 1; assertEquals(1, state.getFailedTokenRangesCount()); @@ -346,7 +346,7 @@ public void testGetFailedTokenRangesCount() { @Test public void isSuccess() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.success = true; assertTrue(state.isSuccess()); @@ -359,7 +359,7 @@ public void isSuccess() { @Test public void testWaitForRepairToCompleteDoesNotSetSuccessWhenProgressReceivesError() throws InterruptedException { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); state.progress("test", progressEvent); @@ -372,7 +372,7 @@ public void testWaitForRepairToCompleteDoesNotSetSuccessWhenProgressReceivesErro @Test public void testResetWaitCondition() { - AutoRepairState state = RepairType.getAutoRepairState(repairType); + UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); state.condition.signalAll(); assertTrue(state.condition.isSignalled()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairTest.java similarity index 79% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairTest.java index df9f105b615f..4e6e755a09f4 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.HashMap; import java.util.Map; @@ -34,38 +34,38 @@ import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.ReplicationParams; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; import org.apache.cassandra.schema.SchemaTestUtil; -import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.UnifiedRepairService; -import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.Util.setUnifiedRepairEnabled; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class AutoRepairTest extends CQLTester +public class UnifiedRepairTest extends CQLTester { @BeforeClass public static void setupClass() throws Exception { - setAutoRepairEnabled(true); + setUnifiedRepairEnabled(true); requireNetwork(); } @Before public void setup() { - AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.full, true); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); - AutoRepairService.setup(); + DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(RepairType.full, true); + DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(RepairType.incremental, true); + UnifiedRepairService.setup(); } @Test public void testSetup() { - AutoRepair instance = new AutoRepair(); + UnifiedRepair instance = new UnifiedRepair(); instance.setup(); assertEquals(RepairType.values().length, instance.repairExecutors.size()); @@ -81,7 +81,7 @@ public void testSetup() @Test public void testSafeGuardSetupCall() { - AutoRepair instance = new AutoRepair(); + UnifiedRepair instance = new UnifiedRepair(); // only one should be setup, and rest should be ignored instance.setup(); @@ -101,22 +101,22 @@ public void testSafeGuardSetupCall() @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithCDCReplay() { - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(RepairType.incremental, true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); - AutoRepair instance = new AutoRepair(); + UnifiedRepair instance = new UnifiedRepair(); instance.setup(); } @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithMVReplay() { - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(RepairType.incremental, true); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - AutoRepair instance = new AutoRepair(); + UnifiedRepair instance = new UnifiedRepair(); instance.setup(); } @@ -141,14 +141,14 @@ public void testCheckNTSreplicationNodeInsideOutsideDC() // case 1 : // node reside in "datacenter1" // keyspace has replica in "datacenter1" - Assert.assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + Assert.assertTrue(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(ks)); } else if (ks.getName().equals(ksname2)) { // case 2 : // node reside in "datacenter1" // keyspace has replica in "datacenter2" - Assert.assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + Assert.assertFalse(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(ks)); } } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtilsTest.java similarity index 70% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java rename to test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtilsTest.java index 3629c03041ab..28234f640223 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtilsTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.autorepair; +package org.apache.cassandra.repair.unifiedrepair; import java.util.List; import java.util.Set; @@ -37,9 +37,9 @@ import org.apache.cassandra.locator.IEndpointSnitch; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Locator; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils.CurrentRepairStatus; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.UnifiedRepairHistory; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.CurrentRepairStatus; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; @@ -51,14 +51,14 @@ import org.mockito.Mock; import org.mockito.MockitoAnnotations; -import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.Util.setUnifiedRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_DELETE_HOSTS; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_FORCE_REPAIR; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_FINISH_TS; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_PRIORITY; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_START_TS; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_TURN; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_DELETE_HOSTS; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_FORCE_REPAIR; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_REPAIR_FINISH_TS; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_REPAIR_PRIORITY; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_REPAIR_START_TS; +import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_REPAIR_TURN; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -66,7 +66,7 @@ import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.when; -public class AutoRepairUtilsTest extends CQLTester +public class UnifiedRepairUtilsTest extends CQLTester { static RepairType repairType = RepairType.incremental; static UUID hostId; @@ -82,12 +82,12 @@ public class AutoRepairUtilsTest extends CQLTester public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - setAutoRepairEnabled(true); + setUnifiedRepairEnabled(true); requireNetwork(); defaultSnitch = DatabaseDescriptor.getLocator(); localEndpoint = FBUtilities.getBroadcastAddressAndPort(); hostId = StorageService.instance.getHostIdForEndpoint(localEndpoint); - StorageService.instance.doAutoRepairSetup(); + StorageService.instance.doUnifiedRepairSetup(); } @Before @@ -97,14 +97,14 @@ public void setup() QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", "ks")); QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", "ks", "tbl")); - AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); MockitoAnnotations.initMocks(this); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY)); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY)); } @Test @@ -112,14 +112,14 @@ public void testSetForceRepair() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); - AutoRepairUtils.setForceRepair(repairType, ImmutableSet.of(localEndpoint)); + UnifiedRepairUtils.setForceRepair(repairType, ImmutableSet.of(localEndpoint)); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -129,11 +129,11 @@ public void testSetForceRepair() @Test public void testSetForceRepairNewNode() { - AutoRepairUtils.setForceRepairNewNode(repairType); + UnifiedRepairUtils.setForceRepairNewNode(repairType); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -146,14 +146,14 @@ public void testClearDeleteHosts() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, delete_hosts, delete_hosts_update_time) VALUES ('%s', %s, { %s }, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId, hostId)); - AutoRepairUtils.clearDeleteHosts(repairType, hostId); + UnifiedRepairUtils.clearDeleteHosts(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT delete_hosts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -162,23 +162,23 @@ public void testClearDeleteHosts() } @Test - public void testGetAutoRepairHistoryForLocalGroup() + public void testGetUnifiedRepairHistoryForLocalGroup() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); - List history = AutoRepairUtils.getAutoRepairHistory(repairType); + List history = UnifiedRepairUtils.getUnifiedRepairHistory(repairType); assertNotNull(history); assertEquals(1, history.size()); assertEquals(hostId, history.get(0).hostId); } @Test - public void testGetAutoRepairHistoryForLocalGroup_empty_history() + public void testGetUnifiedRepairHistoryForLocalGroup_empty_history() { - List history = AutoRepairUtils.getAutoRepairHistory(repairType); + List history = UnifiedRepairUtils.getUnifiedRepairHistory(repairType); assertNull(history); } @@ -190,22 +190,22 @@ public void testGetCurrentRepairStatus() UUID regularRepair = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair, repair_start_ts) VALUES ('%s', %s, true, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), forceRepair)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, repair_start_ts) VALUES ('%s', %s, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), regularRepair)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, repairType.toString(), regularRepair)); - CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType); + CurrentRepairStatus status = UnifiedRepairUtils.getCurrentRepairStatus(repairType); assertNotNull(status); assertEquals(1, status.historiesWithoutOnGoingRepair.size()); @@ -221,7 +221,7 @@ public void testGetCurrentRepairStatus() @Test public void testGetHostIdsInCurrentRing() { - TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType); + TreeSet hosts = UnifiedRepairUtils.getHostIdsInCurrentRing(repairType); assertNotNull(hosts); assertEquals(1, hosts.size()); @@ -233,13 +233,13 @@ public void testGetHostIdsInCurrentRing_multiple_nodes() { InetAddressAndPort ignoredEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 1); InetAddressAndPort deadEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 2); - DatabaseDescriptor.getAutoRepairConfig().setIgnoreDCs(repairType, ImmutableSet.of("dc2")); + DatabaseDescriptor.getUnifiedRepairConfig().setIgnoreDCs(repairType, ImmutableSet.of("dc2")); //DatabaseDescriptor.setEndpointSnitch(snitchMock); when(snitchMock.location(localEndpoint)).thenReturn(new Location("dc1", "rac1")); when(snitchMock.location(ignoredEndpoint)).thenReturn(new Location("dc2", "rac1")); when(snitchMock.location(deadEndpoint)).thenReturn(new Location("dc1", "rac1")); - TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType, ImmutableSet.of(new NodeAddresses(localEndpoint), new NodeAddresses(ignoredEndpoint), new NodeAddresses(deadEndpoint))); + TreeSet hosts = UnifiedRepairUtils.getHostIdsInCurrentRing(repairType, ImmutableSet.of(new NodeAddresses(localEndpoint), new NodeAddresses(ignoredEndpoint), new NodeAddresses(deadEndpoint))); assertNotNull(hosts); assertEquals(1, hosts.size()); @@ -252,93 +252,93 @@ public void testGetHostWithLongestUnrepairTime() UUID otherHostId = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, repair_finish_ts) VALUES ('%s', %s, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), otherHostId)); - AutoRepairHistory history = AutoRepairUtils.getHostWithLongestUnrepairTime(repairType); + UnifiedRepairHistory history = UnifiedRepairUtils.getHostWithLongestUnrepairTime(repairType); assertEquals(hostId, history.hostId); } @Test - public void testGetMaxNumberOfNodeRunAutoRepairInGroup_0_group_size() + public void testGetMaxNumberOfNodeRunUnifiedRepairInGroup_0_group_size() { - DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 2); + DatabaseDescriptor.getUnifiedRepairConfig().setParallelRepairCount(repairType, 2); - int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepair(repairType, 0); + int count = UnifiedRepairUtils.getMaxNumberOfNodeRunUnifiedRepair(repairType, 0); assertEquals(2, count); } @Test - public void testGetMaxNumberOfNodeRunAutoRepairInGroup_percentage() + public void testGetMaxNumberOfNodeRunUnifiedRepairInGroup_percentage() { - DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 2); - DatabaseDescriptor.getAutoRepairConfig().setParallelRepairPercentage(repairType, 50); + DatabaseDescriptor.getUnifiedRepairConfig().setParallelRepairCount(repairType, 2); + DatabaseDescriptor.getUnifiedRepairConfig().setParallelRepairPercentage(repairType, 50); - int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepair(repairType, 10); + int count = UnifiedRepairUtils.getMaxNumberOfNodeRunUnifiedRepair(repairType, 10); assertEquals(5, count); } @Test - public void testDeleteAutoRepairHistory() + public void testDeleteUnifiedRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); - AutoRepairUtils.deleteAutoRepairHistory(repairType, hostId); + UnifiedRepairUtils.deleteUnifiedRepairHistory(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(0, result.size()); } @Test - public void testUpdateStartAutoRepairHistory() + public void testUpdateStartUnifiedRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); - AutoRepairUtils.updateStartAutoRepairHistory(repairType, hostId, 123, AutoRepairUtils.RepairTurn.MY_TURN); + UnifiedRepairUtils.updateStartUnifiedRepairHistory(repairType, hostId, 123, UnifiedRepairUtils.RepairTurn.MY_TURN); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT repair_start_ts, repair_turn FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); UntypedResultSet.Row row = result.one(); assertEquals(123, row.getLong(COL_REPAIR_START_TS, 0)); - assertEquals(AutoRepairUtils.RepairTurn.MY_TURN.toString(), row.getString(COL_REPAIR_TURN)); + assertEquals(UnifiedRepairUtils.RepairTurn.MY_TURN.toString(), row.getString(COL_REPAIR_TURN)); } @Test - public void testUpdateFinishAutoRepairHistory() + public void testUpdateFinishUnifiedRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); - AutoRepairUtils.updateFinishAutoRepairHistory(repairType, hostId, 123); + UnifiedRepairUtils.updateFinishUnifiedRepairHistory(repairType, hostId, 123); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT repair_finish_ts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -351,14 +351,14 @@ public void testAddHostIdToDeleteHosts() UUID otherHostId = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), otherHostId)); - AutoRepairUtils.addHostIdToDeleteHosts(repairType, hostId, otherHostId); + UnifiedRepairUtils.addHostIdToDeleteHosts(repairType, hostId, otherHostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType.toString(), otherHostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -371,11 +371,11 @@ public void testAddHostIdToDeleteHosts() @Test public void testAddPriorityHost() { - AutoRepairUtils.addPriorityHosts(repairType, ImmutableSet.of(localEndpoint)); + UnifiedRepairUtils.addPriorityHosts(repairType, ImmutableSet.of(localEndpoint)); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, repairType.toString())); assertNotNull(result); assertEquals(1, result.size()); @@ -390,14 +390,14 @@ public void testRemovePriorityStatus() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, repairType.toString(), hostId)); - AutoRepairUtils.removePriorityStatus(repairType, hostId); + UnifiedRepairUtils.removePriorityStatus(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, repairType.toString())); assertNotNull(result); assertEquals(1, result.size()); @@ -410,10 +410,10 @@ public void testGetPriorityHosts() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, repairType.toString(), hostId)); - Set hosts = AutoRepairUtils.getPriorityHosts(repairType); + Set hosts = UnifiedRepairUtils.getPriorityHosts(repairType); assertNotNull(hosts); assertEquals(1, hosts.size()); @@ -425,29 +425,29 @@ public void testCheckNodeContainsKeyspaceReplica() { Keyspace ks = Keyspace.open("ks"); - assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + assertTrue(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(ks)); } @Test public void testTableMaxRepairTimeExceeded() { - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairTableMaxRepairTime(repairType, "0s"); + DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairTableMaxRepairTime(repairType, "0s"); - assertTrue(AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, 0)); + assertTrue(UnifiedRepairUtils.tableMaxRepairTimeExceeded(repairType, 0)); } @Test public void testKeyspaceMaxRepairTimeExceeded() { - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairTableMaxRepairTime(repairType, "0s"); + DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairTableMaxRepairTime(repairType, "0s"); - assertTrue(AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, 0, 1)); + assertTrue(UnifiedRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, 0, 1)); } @Test public void testGetLastRepairFinishTime() { - AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); + UnifiedRepairHistory history = new UnifiedRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); assertEquals(0, history.getLastRepairFinishTime()); @@ -461,21 +461,21 @@ public void testMyTurnToRunRepairShouldReturnMyTurnWhenRepairOngoing() { UUID myID = UUID.randomUUID(); UUID otherID = UUID.randomUUID(); - DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 5); + DatabaseDescriptor.getUnifiedRepairConfig().setParallelRepairCount(repairType, 5); long currentMillis = System.currentTimeMillis(); // finish time less than start time means that repair is ongoing - AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); + UnifiedRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); // finish time is larger than start time means that repair for other node is finished - AutoRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); + UnifiedRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); - assertEquals(AutoRepairUtils.RepairTurn.MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myID)); + assertEquals(UnifiedRepairUtils.RepairTurn.MY_TURN, UnifiedRepairUtils.myTurnToRunRepair(repairType, myID)); } @Test public void testLocalStrategyAndNetworkKeyspace() { - assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open("system"))); - assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open(KEYSPACE))); + assertFalse(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open("system"))); + assertTrue(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open(KEYSPACE))); } @Test @@ -484,10 +484,10 @@ public void testGetLastRepairTimeForNode() UUID myID = UUID.randomUUID(); UUID otherID = UUID.randomUUID(); long currentMillis = System.currentTimeMillis(); - AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); - AutoRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); + UnifiedRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); + UnifiedRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); - assertEquals(currentMillis - 100, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); + assertEquals(currentMillis - 100, UnifiedRepairUtils.getLastRepairTimeForNode(repairType, myID)); } @Test @@ -495,6 +495,6 @@ public void testGetLastRepairTimeForNodeWhenHistoryIsEmpty() { UUID myID = UUID.randomUUID(); - assertEquals(0, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); + assertEquals(0, UnifiedRepairUtils.getLastRepairTimeForNode(repairType, myID)); } } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java deleted file mode 100644 index 054f136dad75..000000000000 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service; - -import org.junit.Before; -import org.junit.Test; - -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; - -import static org.junit.Assert.assertEquals; - -public class AutoRepairServiceBasicTest extends CQLTester { - private static AutoRepairService autoRepairService; - private static AutoRepairConfig config; - - @Before - public void setUp() { - DatabaseDescriptor.setCDCOnRepairEnabled(false); - DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - DatabaseDescriptor.setMaterializedViewsEnabled(false); - DatabaseDescriptor.setCDCEnabled(false); - config = new AutoRepairConfig(); - autoRepairService = new AutoRepairService(); - autoRepairService.config = config; - } - - @Test - public void testSetup() { - AutoRepairService.instance.config = null; - - AutoRepairService.setup(); - - assertEquals(DatabaseDescriptor.getAutoRepairConfig(), AutoRepairService.instance.config); - } - - @Test - public void testGetAutoRepairConfigReturnsConfig() { - assertEquals(config, autoRepairService.getAutoRepairConfig()); - } - - @Test - public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() { - autoRepairService.setAutoRepairHistoryClearDeleteHostsBufferDuration("100s"); - - assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); - } - - - @Test - public void testsetAutoRepairMaxRetriesCount() { - autoRepairService.setAutoRepairMaxRetriesCount(101); - - assertEquals(101, config.getRepairMaxRetries()); - } - - - @Test - public void testsetAutoRepairRetryBackoffInSec() { - autoRepairService.setAutoRepairRetryBackoff("102s"); - - assertEquals(102, config.getRepairRetryBackoff().toSeconds()); - } - - @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { - autoRepairService.config = new AutoRepairConfig(false); - - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - - @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { - autoRepairService.config = new AutoRepairConfig(true); - DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - - @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { - autoRepairService.config = new AutoRepairConfig(true); - DatabaseDescriptor.setMaterializedViewsEnabled(true); - DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - - @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { - autoRepairService.config = new AutoRepairConfig(true); - DatabaseDescriptor.setCDCOnRepairEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } - - @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() { - autoRepairService.config = new AutoRepairConfig(true); - DatabaseDescriptor.setCDCEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - } -} diff --git a/test/unit/org/apache/cassandra/service/UnifiedRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/UnifiedRepairServiceBasicTest.java new file mode 100644 index 000000000000..dc7c91d0edef --- /dev/null +++ b/test/unit/org/apache/cassandra/service/UnifiedRepairServiceBasicTest.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; + +import static org.junit.Assert.assertEquals; + +public class UnifiedRepairServiceBasicTest extends CQLTester { + private static UnifiedRepairService unifiedRepairService; + private static UnifiedRepairConfig config; + + @Before + public void setUp() { + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); + config = new UnifiedRepairConfig(); + unifiedRepairService = new UnifiedRepairService(); + unifiedRepairService.config = config; + } + + @Test + public void testSetup() { + UnifiedRepairService.instance.config = null; + + UnifiedRepairService.setup(); + + assertEquals(DatabaseDescriptor.getUnifiedRepairConfig(), UnifiedRepairService.instance.config); + } + + @Test + public void testGetUnifiedRepairConfigReturnsConfig() { + assertEquals(config, unifiedRepairService.getUnifiedRepairConfig()); + } + + @Test + public void testsetUnifiedRepairHistoryClearDeleteHostsBufferInSecV2() { + unifiedRepairService.setUnifiedRepairHistoryClearDeleteHostsBufferDuration("100s"); + + assertEquals(100, config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); + } + + + @Test + public void testsetUnifiedRepairMaxRetriesCount() { + unifiedRepairService.setUnifiedRepairMaxRetriesCount(101); + + assertEquals(101, config.getRepairMaxRetries()); + } + + + @Test + public void testsetUnifiedRepairRetryBackoffInSec() { + unifiedRepairService.setUnifiedRepairRetryBackoff("102s"); + + assertEquals(102, config.getRepairRetryBackoff().toSeconds()); + } + + @Test(expected = ConfigurationException.class) + public void testSetUnifiedRepairEnabledThrowsWithSchedulerDisabled() { + unifiedRepairService.config = new UnifiedRepairConfig(false); + + unifiedRepairService.setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.incremental, true); + } + + @Test(expected = ConfigurationException.class) + public void testSetUnifiedRepairEnabledThrowsForIRWithMVReplay() { + unifiedRepairService.config = new UnifiedRepairConfig(true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + unifiedRepairService.setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.incremental, true); + } + + @Test + public void testSetUnifiedRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { + unifiedRepairService.config = new UnifiedRepairConfig(true); + DatabaseDescriptor.setMaterializedViewsEnabled(true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + unifiedRepairService.setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.incremental, true); + } + + @Test(expected = ConfigurationException.class) + public void testSetUnifiedRepairEnabledThrowsForIRWithCDCReplay() { + unifiedRepairService.config = new UnifiedRepairConfig(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + unifiedRepairService.setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.incremental, true); + } + + @Test + public void testSetUnifiedRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() { + unifiedRepairService.config = new UnifiedRepairConfig(true); + DatabaseDescriptor.setCDCEnabled(true); + unifiedRepairService.setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.incremental, true); + } +} diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/UnifiedRepairServiceRepairTypeTest.java similarity index 72% rename from test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java rename to test/unit/org/apache/cassandra/service/UnifiedRepairServiceRepairTypeTest.java index 7c8645149adc..d91c89ee8619 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/UnifiedRepairServiceRepairTypeTest.java @@ -20,8 +20,8 @@ import com.google.common.collect.ImmutableSet; import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -33,44 +33,44 @@ import java.util.Set; import java.util.UUID; -import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.Util.setUnifiedRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.junit.Assert.assertEquals; @RunWith(Parameterized.class) -public class AutoRepairServiceRepairTypeTest extends CQLTester { +public class UnifiedRepairServiceRepairTypeTest extends CQLTester { @Parameterized.Parameter() - public AutoRepairConfig.RepairType repairType; + public UnifiedRepairConfig.RepairType repairType; private final UUID host1 = UUID.fromString("00000000-0000-0000-0000-000000000001"); private final UUID host2 = UUID.fromString("00000000-0000-0000-0000-000000000002"); - private AutoRepairService instance; + private UnifiedRepairService instance; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() { - return Arrays.asList(AutoRepairConfig.RepairType.values()); + public static Collection repairTypes() { + return Arrays.asList(UnifiedRepairConfig.RepairType.values()); } @BeforeClass public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - setAutoRepairEnabled(true); + setUnifiedRepairEnabled(true); requireNetwork(); } @Before public void setUpTest() { - AutoRepairUtils.setup(); - instance = new AutoRepairService(); + UnifiedRepairUtils.setup(); + instance = new UnifiedRepairService(); } @Test public void testGetOnGoingRepairHostIdsTest() { long now = System.currentTimeMillis(); - AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); - AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); + UnifiedRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); + UnifiedRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); Set hosts = instance.getOnGoingRepairHostIds(repairType); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/UnifiedRepairServiceSetterTest.java similarity index 59% rename from test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java rename to test/unit/org/apache/cassandra/service/UnifiedRepairServiceSetterTest.java index f34e1f0a7071..57b5500b665e 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/UnifiedRepairServiceSetterTest.java @@ -24,8 +24,8 @@ import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.junit.Before; @@ -43,48 +43,48 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.Util.setUnifiedRepairEnabled; import static org.junit.Assert.assertEquals; @RunWith(Parameterized.class) -public class AutoRepairServiceSetterTest extends CQLTester { - private static final AutoRepairConfig config = new AutoRepairConfig(true); +public class UnifiedRepairServiceSetterTest extends CQLTester { + private static final UnifiedRepairConfig config = new UnifiedRepairConfig(true); @Parameterized.Parameter - public AutoRepairConfig.RepairType repairType; + public UnifiedRepairConfig.RepairType repairType; @Parameterized.Parameter(1) public T arg; @Parameterized.Parameter(2) - public BiConsumer setter; + public BiConsumer setter; @Parameterized.Parameter(3) - public Function getter; + public Function getter; @Parameterized.Parameters(name = "{index}: repairType={0}, arg={1}") public static Collection testCases() { DatabaseDescriptor.setConfig(DatabaseDescriptor.loadConfig()); return Stream.of( - forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), - forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), - forEachRepairType(200, AutoRepairService.instance::setRepairSubRangeNum, config::getRepairSubRangeNum), - forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), - forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), - forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), - forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), - forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), - forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), - forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), - forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, AutoRepairServiceSetterTest::isLocalHostForceRepair) + forEachRepairType(true, UnifiedRepairService.instance::setUnifiedRepairEnabled, config::isUnifiedRepairEnabled), + forEachRepairType(100, UnifiedRepairService.instance::setRepairThreads, config::getRepairThreads), + forEachRepairType(200, UnifiedRepairService.instance::setRepairSubRangeNum, config::getRepairSubRangeNum), + forEachRepairType(400, UnifiedRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), + forEachRepairType(ImmutableSet.of("dc1", "dc2"), UnifiedRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), + forEachRepairType(true, UnifiedRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), + forEachRepairType(600, UnifiedRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), + forEachRepairType(700, UnifiedRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), + forEachRepairType(true, UnifiedRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), + forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), UnifiedRepairService.instance::setRepairPriorityForHosts, UnifiedRepairUtils::getPriorityHosts), + forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), UnifiedRepairService.instance::setForceRepairForHosts, UnifiedRepairServiceSetterTest::isLocalHostForceRepair) ).flatMap(Function.identity()).collect(Collectors.toList()); } - private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) { + private static Set isLocalHostForceRepair(UnifiedRepairConfig.RepairType type) { UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( - "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); + "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, hostId, type)); if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) { return ImmutableSet.of(InetAddressAndPort.getLocalHost()); @@ -92,9 +92,9 @@ private static Set isLocalHostForceRepair(AutoRepairConfig.R return ImmutableSet.of(); } - private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { - Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { + private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { + Object[][] testCases = new Object[UnifiedRepairConfig.RepairType.values().length][4]; + for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) { testCases[repairType.ordinal()] = new Object[]{repairType, arg, setter, getter}; } @@ -104,22 +104,22 @@ private static Stream forEachRepairType(T arg, BiConsumer verifyFunc; + public Consumer verifyFunc; @Parameterized.Parameters(name = "repairType={0},paramType={1}") public static Collection testCases() { return Stream.of( - forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type, true)), + forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setUnifiedRepairEnabled(type, true)), forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setRepairThreads(type, 1)), forEachRepairType("number_of_subranges", "2", (type) -> verify(probe, times(1)).setRepairSubRangeNum(type, 2)), forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setRepairMinInterval(type, "3h")), forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type, 4)), - forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type, "5s")), + forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setUnifiedRepairTableMaxRepairTime(type, "5s")), forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type, true)), forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type, 6)), forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type, 7)), forEachRepairType("mv_repair_enabled", "true", (type) -> verify(probe, times(1)).setMVRepairEnabled(type, true)), - forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))) + forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setUnifiedRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))) ).flatMap(Function.identity()).collect(Collectors.toList()); } - private static Stream forEachRepairType(String paramType, String paramVal, Consumer verifyFunc) + private static Stream forEachRepairType(String paramType, String paramVal, Consumer verifyFunc) { - Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + Object[][] testCases = new Object[UnifiedRepairConfig.RepairType.values().length][4]; + for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) { testCases[repairType.ordinal()] = new Object[]{ repairType, paramType, paramVal, verifyFunc }; } @@ -316,12 +316,12 @@ public void test() verifyFunc.accept(repairType); - // test scenario when auto repair is disabled - when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); + // test scenario when unified repair is disabled + when(probe.getUnifiedRepairConfig()).thenReturn(new UnifiedRepairConfig(false)); cmd.execute(probe); - // test new calls are not made when auto repair is disabled + // test new calls are not made when unified repair is disabled verifyFunc.accept(repairType); } } diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/UnifiedRepairStatusTest.java similarity index 76% rename from test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java rename to test/unit/org/apache/cassandra/tools/nodetool/UnifiedRepairStatusTest.java index 16b12ee2350e..a84e24a3a9ae 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/UnifiedRepairStatusTest.java @@ -30,33 +30,33 @@ import org.junit.runners.Parameterized; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.Output; import org.mockito.Mock; import org.mockito.MockitoAnnotations; -import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.Util.setUnifiedRepairEnabled; import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.when; @RunWith(Parameterized.class) -public class AutoRepairStatusTest +public class UnifiedRepairStatusTest { @Mock private static NodeProbe probe; private ByteArrayOutputStream cmdOutput; - private static AutoRepairStatus cmd; + private static UnifiedRepairStatus cmd; @Parameterized.Parameter() - public AutoRepairConfig.RepairType repairType; + public UnifiedRepairConfig.RepairType repairType; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() + public static Collection repairTypes() { - return Arrays.asList(AutoRepairConfig.RepairType.values()); + return Arrays.asList(UnifiedRepairConfig.RepairType.values()); } @Before @@ -66,13 +66,13 @@ public void setUp() throws Exception cmdOutput = new ByteArrayOutputStream(); PrintStream out = new PrintStream(cmdOutput); when(probe.output()).thenReturn(new Output(out, out)); - cmd = new AutoRepairStatus(); + cmd = new UnifiedRepairStatus(); DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.loadConfig(); - setAutoRepairEnabled(true); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.full, true); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); - when(probe.getAutoRepairConfig()).thenReturn(DatabaseDescriptor.getAutoRepairConfig()); + setUnifiedRepairEnabled(true); + DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.full, true); + DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.incremental, true); + when(probe.getUnifiedRepairConfig()).thenReturn(DatabaseDescriptor.getUnifiedRepairConfig()); } @Test(expected = IllegalArgumentException.class) From 510ce5fa915b8ebd00aad1a3fac1c58fdba55b6d Mon Sep 17 00:00:00 2001 From: Jaydeepkumar Chovatia Date: Thu, 21 Nov 2024 09:17:30 -0800 Subject: [PATCH 057/257] Revert "Rename AutoRepair --> UnifiedRepair" This reverts commit f9f6971336c26e1bcae4110f5e0926b3e92b4565. --- .../pages/managing/operating/metrics.adoc | 14 +- .../org/apache/cassandra/config/Config.java | 4 +- .../cassandra/config/DatabaseDescriptor.java | 6 +- .../statements/schema/TableAttributes.java | 8 +- ...airMetrics.java => AutoRepairMetrics.java} | 44 +- ...ger.java => AutoRepairMetricsManager.java} | 10 +- .../metrics/CassandraMetricsRegistry.java | 2 +- .../AutoRepair.java} | 120 +++--- .../AutoRepairConfig.java} | 46 +- .../AutoRepairState.java} | 38 +- .../AutoRepairUtils.java} | 186 ++++----- .../DefaultAutoRepairTokenSplitter.java} | 12 +- .../IAutoRepairTokenRangeSplitter.java} | 10 +- ...epairParams.java => AutoRepairParams.java} | 30 +- .../cassandra/schema/SchemaKeyspace.java | 10 +- .../schema/SystemDistributedKeyspace.java | 44 +- .../apache/cassandra/schema/TableParams.java | 40 +- ...airService.java => AutoRepairService.java} | 50 +-- ...MBean.java => AutoRepairServiceMBean.java} | 20 +- .../cassandra/service/CassandraDaemon.java | 2 +- .../cassandra/service/StorageService.java | 14 +- .../tcm/sequences/BootstrapAndJoin.java | 4 +- .../tcm/sequences/BootstrapAndReplace.java | 4 +- .../tcm/sequences/ReplaceSameAddress.java | 4 +- .../org/apache/cassandra/tools/NodeProbe.java | 94 ++--- .../org/apache/cassandra/tools/NodeTool.java | 6 +- ...epairStatus.java => AutoRepairStatus.java} | 14 +- ...irConfig.java => GetAutoRepairConfig.java} | 22 +- ...irConfig.java => SetAutoRepairConfig.java} | 28 +- .../apache/cassandra/utils/FBUtilities.java | 12 +- ...Test.java => AutoRepairSchedulerTest.java} | 34 +- test/unit/org/apache/cassandra/Util.java | 8 +- .../config/DatabaseDescriptorRefTest.java | 16 +- .../config/YamlConfigurationLoaderTest.java | 14 +- .../AutoRepairConfigTest.java} | 78 ++-- ...efaultTokenSplitterParameterizedTest.java} | 28 +- .../AutoRepairKeyspaceTest.java} | 10 +- .../AutoRepairParameterizedTest.java} | 394 +++++++++--------- .../AutoRepairStateFactoryTest.java} | 12 +- .../AutoRepairStateTest.java} | 84 ++-- .../AutoRepairTest.java} | 36 +- .../AutoRepairUtilsTest.java} | 174 ++++---- .../SSTableRepairedAtTest.java | 6 +- .../service/AutoRepairServiceBasicTest.java | 118 ++++++ ...a => AutoRepairServiceRepairTypeTest.java} | 26 +- ....java => AutoRepairServiceSetterTest.java} | 60 +-- .../UnifiedRepairServiceBasicTest.java | 118 ------ .../cassandra/tools/JMXStandardsTest.java | 6 +- ...tusTest.java => AutoRepairStatusTest.java} | 24 +- ...Test.java => SetAutoRepairConfigTest.java} | 64 +-- 50 files changed, 1104 insertions(+), 1104 deletions(-) rename src/java/org/apache/cassandra/metrics/{UnifiedRepairMetrics.java => AutoRepairMetrics.java} (76%) rename src/java/org/apache/cassandra/metrics/{UnifiedRepairMetricsManager.java => AutoRepairMetricsManager.java} (70%) rename src/java/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepair.java => autorepair/AutoRepair.java} (78%) rename src/java/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairConfig.java => autorepair/AutoRepairConfig.java} (91%) rename src/java/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairState.java => autorepair/AutoRepairState.java} (88%) rename src/java/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairUtils.java => autorepair/AutoRepairUtils.java} (83%) rename src/java/org/apache/cassandra/repair/{unifiedrepair/DefaultUnifiedRepairTokenSplitter.java => autorepair/DefaultAutoRepairTokenSplitter.java} (82%) rename src/java/org/apache/cassandra/repair/{unifiedrepair/IUnifiedRepairTokenRangeSplitter.java => autorepair/IAutoRepairTokenRangeSplitter.java} (87%) rename src/java/org/apache/cassandra/schema/{UnifiedRepairParams.java => AutoRepairParams.java} (74%) rename src/java/org/apache/cassandra/service/{UnifiedRepairService.java => AutoRepairService.java} (72%) rename src/java/org/apache/cassandra/service/{UnifiedRepairServiceMBean.java => AutoRepairServiceMBean.java} (74%) rename src/java/org/apache/cassandra/tools/nodetool/{UnifiedRepairStatus.java => AutoRepairStatus.java} (82%) rename src/java/org/apache/cassandra/tools/nodetool/{GetUnifiedRepairConfig.java => GetAutoRepairConfig.java} (82%) rename src/java/org/apache/cassandra/tools/nodetool/{SetUnifiedRepairConfig.java => SetAutoRepairConfig.java} (83%) rename test/distributed/org/apache/cassandra/distributed/test/repair/{UnifiedRepairSchedulerTest.java => AutoRepairSchedulerTest.java} (83%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairConfigTest.java => autorepair/AutoRepairConfigTest.java} (77%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairDefaultTokenSplitterParameterizedTest.java => autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java} (84%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairKeyspaceTest.java => autorepair/AutoRepairKeyspaceTest.java} (87%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairParameterizedTest.java => autorepair/AutoRepairParameterizedTest.java} (52%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairStateFactoryTest.java => autorepair/AutoRepairStateFactoryTest.java} (76%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairStateTest.java => autorepair/AutoRepairStateTest.java} (72%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairTest.java => autorepair/AutoRepairTest.java} (79%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair/UnifiedRepairUtilsTest.java => autorepair/AutoRepairUtilsTest.java} (70%) rename test/unit/org/apache/cassandra/repair/{unifiedrepair => autorepair}/SSTableRepairedAtTest.java (97%) create mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java rename test/unit/org/apache/cassandra/service/{UnifiedRepairServiceRepairTypeTest.java => AutoRepairServiceRepairTypeTest.java} (72%) rename test/unit/org/apache/cassandra/service/{UnifiedRepairServiceSetterTest.java => AutoRepairServiceSetterTest.java} (59%) delete mode 100644 test/unit/org/apache/cassandra/service/UnifiedRepairServiceBasicTest.java rename test/unit/org/apache/cassandra/tools/nodetool/{UnifiedRepairStatusTest.java => AutoRepairStatusTest.java} (76%) rename test/unit/org/apache/cassandra/tools/nodetool/{SetUnifiedRepairConfigTest.java => SetAutoRepairConfigTest.java} (79%) diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 594c31078d59..2b3e2b75efb4 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1081,16 +1081,16 @@ partitions processed per logged batch partitions processed per unlogged batch |=== -== Unified Repair Metrics +== Automated Repair Metrics -Metrics specifc to unified repair. +Metrics specifc to automated repair. Reported name format: *Metric Name*:: -`org.apache.cassandra.metrics.UnifiedRepair.` +`org.apache.cassandra.metrics.AutoRepair.` *JMX MBean*:: -`org.apache.cassandra.metrics:type=UnifiedRepair name= repairType=` +`org.apache.cassandra.metrics:type=AutoRepair name= repairType=` [cols=",,",options="header",] |=== @@ -1121,15 +1121,15 @@ on the node views considered on the node |TotalDisabledRepairTables |Gauge |Number of tables on which -the unified repair has been disabled on the node +the automated repair has been disabled on the node |RepairTurnMyTurn |Counter |Represents the node's turn to repair |RepairTurnMyTurnDueToPriority |Counter |Represents the node's turn to repair -due to priority set in the unified repair +due to priority set in the automated repair |RepairTurnMyTurnForceRepair |Counter |Represents the node's turn to repair -due to force repair set in the unified repair +due to force repair set in the automated repair |=== diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index e7bbe79f385b..9a0c45622167 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -33,7 +33,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1001,7 +1001,7 @@ public static void setClientMode(boolean clientMode) public volatile boolean password_validator_reconfiguration_enabled = true; public volatile CustomGuardrailConfig password_validator = new CustomGuardrailConfig(); - public volatile UnifiedRepairConfig unified_repair = new UnifiedRepairConfig(); + public volatile AutoRepairConfig auto_repair = new AutoRepairConfig(); /** * The variants of paxos implementation and semantics supported by Cassandra. diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index d7bdd12bf5ac..de3e13099b21 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -114,7 +114,7 @@ import org.apache.cassandra.locator.ReconnectableSnitchHelper; import org.apache.cassandra.locator.SeedProvider; import org.apache.cassandra.locator.SnitchAdapter; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.security.AbstractCryptoProvider; import org.apache.cassandra.security.EncryptionContext; import org.apache.cassandra.security.JREProvider; @@ -5912,9 +5912,9 @@ public static boolean getAccordEphemeralReadEnabledEnabled() return conf.accord.ephemeralReadEnabled; } - public static UnifiedRepairConfig getUnifiedRepairConfig() + public static AutoRepairConfig getAutoRepairConfig() { - return conf.unified_repair; + return conf.auto_repair; } public static double getIncrementalRepairDiskHeadroomRejectRatio() diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index 2eb78f59b153..c8cfca9731ac 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -27,8 +27,8 @@ import org.apache.cassandra.cql3.statements.PropertyDefinitions; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; -import org.apache.cassandra.schema.UnifiedRepairParams; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.schema.AutoRepairParams; import org.apache.cassandra.schema.CachingParams; import org.apache.cassandra.schema.CompactionParams; import org.apache.cassandra.schema.CompressionParams; @@ -199,10 +199,10 @@ private TableParams build(TableParams.Builder builder) builder.transactionalMigrationFrom(TransactionalMigrationFromMode.fromString(getString(Option.TRANSACTIONAL_MIGRATION_FROM))); if (hasOption(Option.REPAIR_FULL)) - builder.unifiedRepairFull(UnifiedRepairParams.fromMap(UnifiedRepairConfig.RepairType.full, getMap(Option.REPAIR_FULL))); + builder.automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, getMap(Option.REPAIR_FULL))); if (hasOption(Option.REPAIR_INCREMENTAL)) - builder.unifiedRepairIncremental(UnifiedRepairParams.fromMap(UnifiedRepairConfig.RepairType.incremental, getMap(Option.REPAIR_INCREMENTAL))); + builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, getMap(Option.REPAIR_INCREMENTAL))); return builder.build(); } diff --git a/src/java/org/apache/cassandra/metrics/UnifiedRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java similarity index 76% rename from src/java/org/apache/cassandra/metrics/UnifiedRepairMetrics.java rename to src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index d4929cbfefb0..b097dd3414c4 100644 --- a/src/java/org/apache/cassandra/metrics/UnifiedRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -20,19 +20,19 @@ import com.codahale.metrics.Counter; import com.codahale.metrics.Gauge; import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepair; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepair; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; /** - * Metrics related to UnifiedRepair. + * Metrics related to AutoRepair. */ -public class UnifiedRepairMetrics +public class AutoRepairMetrics { - public static final String TYPE_NAME = "unifiedrepair"; + public static final String TYPE_NAME = "autorepair"; public Gauge repairsInProgress; public Gauge nodeRepairTimeInSec; public Gauge clusterRepairTimeInSec; @@ -47,15 +47,15 @@ public class UnifiedRepairMetrics public Gauge totalMVTablesConsideredForRepair; public Gauge totalDisabledRepairTables; - public UnifiedRepairMetrics(RepairType repairType) + public AutoRepairMetrics(RepairType repairType) { - UnifiedRepairMetricsFactory factory = new UnifiedRepairMetricsFactory(repairType); + AutoRepairMetricsFactory factory = new AutoRepairMetricsFactory(repairType); repairsInProgress = Metrics.register(factory.createMetricName("RepairsInProgress"), new Gauge() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).isRepairInProgress() ? 1 : 0; + return AutoRepair.instance.getRepairState(repairType).isRepairInProgress() ? 1 : 0; } }); @@ -63,7 +63,7 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getNodeRepairTimeInSec(); + return AutoRepair.instance.getRepairState(repairType).getNodeRepairTimeInSec(); } }); @@ -71,7 +71,7 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getClusterRepairTimeInSec(); + return AutoRepair.instance.getRepairState(repairType).getClusterRepairTimeInSec(); } }); @@ -79,7 +79,7 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getSkippedTokenRangesCount(); + return AutoRepair.instance.getRepairState(repairType).getSkippedTokenRangesCount(); } }); @@ -87,7 +87,7 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getSkippedTablesCount(); + return AutoRepair.instance.getRepairState(repairType).getSkippedTablesCount(); } }); @@ -96,7 +96,7 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getLongestUnrepairedSec(); + return AutoRepair.instance.getRepairState(repairType).getLongestUnrepairedSec(); } }); @@ -104,7 +104,7 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getSucceededTokenRangesCount(); + return AutoRepair.instance.getRepairState(repairType).getSucceededTokenRangesCount(); } }); @@ -112,7 +112,7 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getFailedTokenRangesCount(); + return AutoRepair.instance.getRepairState(repairType).getFailedTokenRangesCount(); } }); @@ -124,7 +124,7 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getTotalMVTablesConsideredForRepair(); + return AutoRepair.instance.getRepairState(repairType).getTotalMVTablesConsideredForRepair(); } }); @@ -132,12 +132,12 @@ public Integer getValue() { public Integer getValue() { - return UnifiedRepair.instance.getRepairState(repairType).getTotalDisabledTablesRepairCount(); + return AutoRepair.instance.getRepairState(repairType).getTotalDisabledTablesRepairCount(); } }); } - public void recordTurn(UnifiedRepairUtils.RepairTurn turn) + public void recordTurn(AutoRepairUtils.RepairTurn turn) { switch (turn) { @@ -156,13 +156,13 @@ public void recordTurn(UnifiedRepairUtils.RepairTurn turn) } @VisibleForTesting - protected static class UnifiedRepairMetricsFactory implements MetricNameFactory + protected static class AutoRepairMetricsFactory implements MetricNameFactory { - private static final String TYPE = "UnifiedRepair"; + private static final String TYPE = "AutoRepair"; @VisibleForTesting protected final String repairType; - protected UnifiedRepairMetricsFactory(RepairType repairType) + protected AutoRepairMetricsFactory(RepairType repairType) { this.repairType = toLowerCaseLocalized(repairType.toString()); } diff --git a/src/java/org/apache/cassandra/metrics/UnifiedRepairMetricsManager.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java similarity index 70% rename from src/java/org/apache/cassandra/metrics/UnifiedRepairMetricsManager.java rename to src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java index e8057595944d..e293945c9846 100644 --- a/src/java/org/apache/cassandra/metrics/UnifiedRepairMetricsManager.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java @@ -18,17 +18,17 @@ package org.apache.cassandra.metrics; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -public class UnifiedRepairMetricsManager +public class AutoRepairMetricsManager { - private static final Map metrics = new ConcurrentHashMap<>(); + private static final Map metrics = new ConcurrentHashMap<>(); - public static UnifiedRepairMetrics getMetrics(RepairType repairType) + public static AutoRepairMetrics getMetrics(RepairType repairType) { - return metrics.computeIfAbsent(repairType, k -> new UnifiedRepairMetrics(repairType)); + return metrics.computeIfAbsent(repairType, k -> new AutoRepairMetrics(repairType)); } } diff --git a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java index 0901fa9733fa..11b36c606a3a 100644 --- a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java +++ b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java @@ -154,7 +154,7 @@ public class CassandraMetricsRegistry extends MetricRegistry .add(ThreadPoolMetrics.TYPE_NAME) .add(TrieMemtableMetricsView.TYPE_NAME) .add(UnweightedCacheMetrics.TYPE_NAME) - .add(UnifiedRepairMetrics.TYPE_NAME) + .add(AutoRepairMetrics.TYPE_NAME) .build(); } diff --git a/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java similarity index 78% rename from src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepair.java rename to src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 02db8951fa64..1abb71d69cac 100644 --- a/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.ArrayList; import java.util.EnumMap; @@ -40,7 +40,7 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,35 +53,35 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; import org.apache.cassandra.utils.concurrent.Future; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; -public class UnifiedRepair +public class AutoRepair { - private static final Logger logger = LoggerFactory.getLogger(UnifiedRepair.class); + private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); @VisibleForTesting protected static Supplier timeFunc = Clock.Global::currentTimeMillis; - public static UnifiedRepair instance = new UnifiedRepair(); + public static AutoRepair instance = new AutoRepair(); // Sleep for 5 seconds if repair finishes quickly to flush JMX metrics; it happens only for Cassandra nodes with tiny amount of data. public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); @VisibleForTesting - protected final Map repairExecutors; + protected final Map repairExecutors; - protected final Map repairRunnableExecutors; + protected final Map repairRunnableExecutors; @VisibleForTesting - protected final Map repairStates; + protected final Map repairStates; @VisibleForTesting protected static Consumer> shuffleFunc = java.util.Collections::shuffle; @@ -89,23 +89,23 @@ public class UnifiedRepair @VisibleForTesting protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; - protected final Map tokenRangeSplitters = new EnumMap<>(UnifiedRepairConfig.RepairType.class); + protected final Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); private boolean isSetupDone = false; @VisibleForTesting - protected UnifiedRepair() + protected AutoRepair() { - UnifiedRepairConfig config = DatabaseDescriptor.getUnifiedRepairConfig(); - repairExecutors = new EnumMap<>(UnifiedRepairConfig.RepairType.class); - repairRunnableExecutors = new EnumMap<>(UnifiedRepairConfig.RepairType.class); - repairStates = new EnumMap<>(UnifiedRepairConfig.RepairType.class); - for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); + repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { - repairExecutors.put(repairType, executorFactory().scheduled(false, "UnifiedRepair-Repair-" + repairType, Thread.NORM_PRIORITY)); - repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "UnifiedRepair-RepairRunnable-" + repairType, Thread.NORM_PRIORITY)); - repairStates.put(repairType, UnifiedRepairConfig.RepairType.getUnifiedRepairState(repairType)); - tokenRangeSplitters.put(repairType, FBUtilities.newUnifiedRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType))); + repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType, Thread.NORM_PRIORITY)); + repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType, Thread.NORM_PRIORITY)); + repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); + tokenRangeSplitters.put(repairType, FBUtilities.newAutoRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType))); } } @@ -119,13 +119,13 @@ public void setup() { return; } - UnifiedRepairConfig config = DatabaseDescriptor.getUnifiedRepairConfig(); - UnifiedRepairUtils.setup(); + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); + AutoRepairUtils.setup(); - for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { - if (config.isUnifiedRepairEnabled(repairType)) - UnifiedRepairService.instance.checkCanRun(repairType); + if (config.isAutoRepairEnabled(repairType)) + AutoRepairService.instance.checkCanRun(repairType); repairExecutors.get(repairType).scheduleWithFixedDelay( () -> repair(repairType), @@ -138,26 +138,26 @@ public void setup() } // repairAsync runs a repair session of the given type asynchronously. - public void repairAsync(UnifiedRepairConfig.RepairType repairType) + public void repairAsync(AutoRepairConfig.RepairType repairType) { - if (!UnifiedRepairService.instance.getUnifiedRepairConfig().isUnifiedRepairEnabled(repairType)) + if (!AutoRepairService.instance.getAutoRepairConfig().isAutoRepairEnabled(repairType)) { - throw new ConfigurationException("Unified-repair is disabled for repair type " + repairType); + throw new ConfigurationException("Auto-repair is disabled for repair type " + repairType); } repairExecutors.get(repairType).submit(() -> repair(repairType)); } // repair runs a repair session of the given type synchronously. - public void repair(UnifiedRepairConfig.RepairType repairType) + public void repair(AutoRepairConfig.RepairType repairType) { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); - if (!config.isUnifiedRepairEnabled(repairType)) + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (!config.isAutoRepairEnabled(repairType)) { - logger.debug("Unified-repair is disabled for repair type {}", repairType); + logger.debug("Auto-repair is disabled for repair type {}", repairType); return; } - UnifiedRepairService.instance.checkCanRun(repairType); - UnifiedRepairState repairState = repairStates.get(repairType); + AutoRepairService.instance.checkCanRun(repairType); + AutoRepairState repairState = repairStates.get(repairType); try { String localDC = DatabaseDescriptor.getLocalDataCenter(); @@ -168,16 +168,16 @@ public void repair(UnifiedRepairConfig.RepairType repairType) } // refresh the longest unrepaired node - repairState.setLongestUnrepairedNode(UnifiedRepairUtils.getHostWithLongestUnrepairTime(repairType)); + repairState.setLongestUnrepairedNode(AutoRepairUtils.getHostWithLongestUnrepairTime(repairType)); //consistency level to use for local query UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - RepairTurn turn = UnifiedRepairUtils.myTurnToRunRepair(repairType, myId); + RepairTurn turn = AutoRepairUtils.myTurnToRunRepair(repairType, myId); if (turn == MY_TURN || turn == MY_TURN_DUE_TO_PRIORITY || turn == MY_TURN_FORCE_REPAIR) { repairState.recordTurn(turn); - // For normal unified repair, we will use primary range only repairs (Repair with -pr option). - // For some cases, we may set the unified_repair_primary_token_range_only flag to false then we will do repair + // For normal auto repair, we will use primary range only repairs (Repair with -pr option). + // For some cases, we may set the auto_repair_primary_token_range_only flag to false then we will do repair // without -pr. We may also do force repair for certain node that we want to repair all the data on one node // When doing force repair, we want to repair without -pr. boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) @@ -190,7 +190,7 @@ public void repair(UnifiedRepairConfig.RepairType repairType) long startTime = timeFunc.get(); logger.info("My host id: {}, my turn to run repair...repair primary-ranges only? {}", myId, config.getRepairPrimaryTokenRangeOnly(repairType)); - UnifiedRepairUtils.updateStartUnifiedRepairHistory(repairType, myId, timeFunc.get(), turn); + AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId, timeFunc.get(), turn); repairState.setRepairKeyspaceCount(0); repairState.setRepairInProgress(true); @@ -201,13 +201,13 @@ public void repair(UnifiedRepairConfig.RepairType repairType) List keyspaces = new ArrayList<>(); Keyspace.all().forEach(keyspaces::add); - // Unified-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair + // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. shuffleFunc.accept(keyspaces); for (Keyspace keyspace : keyspaces) { - if (!UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) + if (!AutoRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) { continue; } @@ -237,20 +237,20 @@ public void repair(UnifiedRepairConfig.RepairType repairType) tableStartTime = timeFunc.get(); } previousAssignment = curRepairAssignment; - if (!config.isUnifiedRepairEnabled(repairType)) + if (!config.isAutoRepairEnabled(repairType)) { - logger.error("Unified-repair for type {} is disabled hence not running repair", repairType); + logger.error("Auto-repair for type {} is disabled hence not running repair", repairType); repairState.setRepairInProgress(false); return; } - if (UnifiedRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, tablesToBeRepairedList.size())) + if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, tablesToBeRepairedList.size())) { collectectedRepairStats.skippedTokenRanges += totalRepairAssignments - totalProcessedAssignments; logger.info("Keyspace took too much time to repair hence skipping it {}", keyspaceName); break; } - if (repairOneTableAtATime && UnifiedRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) + if (repairOneTableAtATime && AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) { collectectedRepairStats.skippedTokenRanges += 1; logger.info("Table took too much time to repair hence skipping it table name {}.{}, token range {}", @@ -339,17 +339,17 @@ else if (retryCount < config.getRepairMaxRetries()) } catch (Exception e) { - logger.error("Exception in unifiedrepair:", e); + logger.error("Exception in autorepair:", e); } } - private boolean tooSoonToRunRepair(UnifiedRepairConfig.RepairType repairType, UnifiedRepairState repairState, UnifiedRepairConfig config, UUID myId) + private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoRepairState repairState, AutoRepairConfig config, UUID myId) { if (repairState.getLastRepairTime() == 0) { // the node has either just boooted or has not run repair before, // we should check for the node's repair history in the DB - repairState.setLastRepairTime(UnifiedRepairUtils.getLastRepairTimeForNode(repairType, myId)); + repairState.setLastRepairTime(AutoRepairUtils.getLastRepairTimeForNode(repairType, myId)); } /** check if it is too soon to run repair. one of the reason we * should not run frequent repair is that repair triggers @@ -365,7 +365,7 @@ private boolean tooSoonToRunRepair(UnifiedRepairConfig.RepairType repairType, Un return false; } - private List retrieveTablesToBeRepaired(Keyspace keyspace, UnifiedRepairConfig config, UnifiedRepairConfig.RepairType repairType, UnifiedRepairState repairState, CollectectedRepairStats collectectedRepairStats) + private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairConfig config, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, CollectectedRepairStats collectectedRepairStats) { Tables tables = keyspace.getMetadata().tables; List tablesToBeRepaired = new ArrayList<>(); @@ -377,7 +377,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, UnifiedRepair String tableName = tableMetadata.name; ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); - if (!columnFamilyStore.metadata().params.unifiedRepair.get(repairType).repairEnabled()) + if (!columnFamilyStore.metadata().params.automatedRepair.get(repairType).repairEnabled()) { logger.info("Repair is disabled for keyspace {} for tables: {}", keyspace.getName(), tableName); repairState.setTotalDisabledTablesRepairCount(repairState.getTotalDisabledTablesRepairCount() + 1); @@ -385,7 +385,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, UnifiedRepair continue; } - // this is done to make unifiedrepair safe as running repair on table with more sstables + // this is done to make autorepair safe as running repair on table with more sstables // may have its own challenges int totalSSTables = columnFamilyStore.getLiveSSTables().size(); if (totalSSTables > config.getRepairSSTableCountHigherThreshold(repairType)) @@ -399,7 +399,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, UnifiedRepair tablesToBeRepaired.add(tableName); // See if we should repair MVs as well that are associated with this given table - List mvs = UnifiedRepairUtils.getAllMVs(repairType, keyspace, tableMetadata); + List mvs = AutoRepairUtils.getAllMVs(repairType, keyspace, tableMetadata); if (!mvs.isEmpty()) { tablesToBeRepaired.addAll(mvs); @@ -409,14 +409,14 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, UnifiedRepair return tablesToBeRepaired; } - private void cleanupAndUpdateStats(RepairTurn turn, UnifiedRepairConfig.RepairType repairType, UnifiedRepairState repairState, UUID myId, + private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, long startTime, CollectectedRepairStats collectectedRepairStats) throws InterruptedException { //if it was due to priority then remove it now if (turn == MY_TURN_DUE_TO_PRIORITY) { logger.info("Remove current host from priority list"); - UnifiedRepairUtils.removePriorityStatus(repairType, myId); + AutoRepairUtils.removePriorityStatus(repairType, myId); } repairState.setFailedTokenRangesCount(collectectedRepairStats.failedTokenRanges); @@ -446,10 +446,10 @@ private void cleanupAndUpdateStats(RepairTurn turn, UnifiedRepairConfig.RepairTy Thread.sleep(SLEEP_IF_REPAIR_FINISHES_QUICKLY.toMilliseconds()); } repairState.setRepairInProgress(false); - UnifiedRepairUtils.updateFinishUnifiedRepairHistory(repairType, myId, timeFunc.get()); + AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId, timeFunc.get()); } - public UnifiedRepairState getRepairState(UnifiedRepairConfig.RepairType repairType) + public AutoRepairState getRepairState(AutoRepairConfig.RepairType repairType) { return repairStates.get(repairType); } diff --git a/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java similarity index 91% rename from src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfig.java rename to src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 33f37b8f57ee..a80a875125a9 100644 --- a/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.io.Serializable; import java.util.ArrayList; @@ -34,9 +34,9 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.config.ParameterizedClass; -public class UnifiedRepairConfig implements Serializable +public class AutoRepairConfig implements Serializable { - // enable/disable unified repair globally, overrides all other settings. Cannot be modified dynamically. + // enable/disable auto repair globally, overrides all other settings. Cannot be modified dynamically. // if it is set to false, then no repair will be scheduled, including full and incremental repairs by this framework. // if it is set to true, then this repair scheduler will consult another config available for each RepairType, and based on that config, it will schedule repairs. public volatile Boolean enabled; @@ -61,7 +61,7 @@ public enum RepairType implements Serializable full, incremental; - public static UnifiedRepairState getUnifiedRepairState(RepairType repairType) + public static AutoRepairState getAutoRepairState(RepairType repairType) { switch (repairType) { @@ -78,12 +78,12 @@ public static UnifiedRepairState getUnifiedRepairState(RepairType repairType) // repair_type_overrides overrides the global_settings for a specific repair type public volatile Map repair_type_overrides = new EnumMap<>(RepairType.class); - public UnifiedRepairConfig() + public AutoRepairConfig() { this(false); } - public UnifiedRepairConfig(boolean enabled) + public AutoRepairConfig(boolean enabled) { this.enabled = enabled; global_settings = Options.getDefaultOptions(); @@ -98,12 +98,12 @@ public DurationSpec.IntSecondsBound getRepairCheckInterval() return repair_check_interval; } - public boolean isUnifiedRepairSchedulingEnabled() + public boolean isAutoRepairSchedulingEnabled() { return enabled; } - public DurationSpec.IntSecondsBound getUnifiedRepairHistoryClearDeleteHostsBufferInterval() + public DurationSpec.IntSecondsBound getAutoRepairHistoryClearDeleteHostsBufferInterval() { return history_clear_delete_hosts_buffer_interval; } @@ -111,10 +111,10 @@ public DurationSpec.IntSecondsBound getUnifiedRepairHistoryClearDeleteHostsBuffe public void startScheduler() { enabled = true; - UnifiedRepair.instance.setup(); + AutoRepair.instance.setup(); } - public void setUnifiedRepairHistoryClearDeleteHostsBufferInterval(String duration) + public void setAutoRepairHistoryClearDeleteHostsBufferInterval(String duration) { history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound(duration); } @@ -139,12 +139,12 @@ public void setRepairRetryBackoff(String interval) repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); } - public boolean isUnifiedRepairEnabled(RepairType repairType) + public boolean isAutoRepairEnabled(RepairType repairType) { return enabled && applyOverrides(repairType, opt -> opt.enabled); } - public void setUnifiedRepairEnabled(RepairType repairType, boolean enabled) + public void setAutoRepairEnabled(RepairType repairType, boolean enabled) { ensureOverrides(repairType); repair_type_overrides.get(repairType).enabled = enabled; @@ -205,15 +205,15 @@ public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssta repair_type_overrides.get(repairType).sstable_upper_threshold = sstableHigherThreshold; } - public DurationSpec.IntSecondsBound getUnifiedRepairTableMaxRepairTime(RepairType repairType) + public DurationSpec.IntSecondsBound getAutoRepairTableMaxRepairTime(RepairType repairType) { return applyOverrides(repairType, opt -> opt.table_max_repair_time); } - public void setUnifiedRepairTableMaxRepairTime(RepairType repairType, String unifiedRepairTableMaxRepairTime) + public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) { ensureOverrides(repairType); - repair_type_overrides.get(repairType).table_max_repair_time = new DurationSpec.IntSecondsBound(unifiedRepairTableMaxRepairTime); + repair_type_overrides.get(repairType).table_max_repair_time = new DurationSpec.IntSecondsBound(autoRepairTableMaxRepairTime); } public Set getIgnoreDCs(RepairType repairType) @@ -309,11 +309,11 @@ public void setRepairSessionTimeout(RepairType repairType, String repairSessionT repair_type_overrides.get(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); } - // Options configures unified-repair behavior for a given repair type. + // Options configures auto-repair behavior for a given repair type. // All fields can be modified dynamically. public static class Options implements Serializable { - // defaultOptions defines the default unified-repair behavior when no overrides are defined + // defaultOptions defines the default auto-repair behavior when no overrides are defined @VisibleForTesting protected static final Options defaultOptions = getDefaultOptions(); @@ -339,16 +339,16 @@ protected static Options getDefaultOptions() opts.force_repair_new_node = false; opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.mv_repair_enabled = false; - opts.token_range_splitter = new ParameterizedClass(DefaultUnifiedRepairTokenSplitter.class.getName(), Collections.emptyMap()); + opts.token_range_splitter = new ParameterizedClass(DefaultAutoRepairTokenSplitter.class.getName(), Collections.emptyMap()); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); // 5 minutes opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours return opts; } - // enable/disable unified repair for the given repair type + // enable/disable auto repair for the given repair type public volatile Boolean enabled; - // unified repair is default repair table by table, if this is enabled, the framework will repair all the tables in a keyspace in one go. + // auto repair is default repair table by table, if this is enabled, the framework will repair all the tables in a keyspace in one go. public volatile Boolean repair_by_keyspace; // the number of subranges to split each to-be-repaired token range into, // the higher this number, the smaller the repair sessions will be @@ -387,7 +387,7 @@ protected static Options getDefaultOptions() // specifies a denylist of datacenters to repair // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. public volatile Set ignore_dcs; - // Set this 'true' if UnifiedRepair should repair only the primary ranges owned by this node; else, 'false' + // Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' // It is the same as -pr in nodetool repair options. public volatile Boolean repair_primary_token_range_only; // configures whether to force immediate repair on new nodes @@ -401,9 +401,9 @@ protected static Options getDefaultOptions() // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. public volatile DurationSpec.IntSecondsBound table_max_repair_time; // the default is 'true'. - // This flag determines whether the unified-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. + // This flag determines whether the auto-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. public volatile Boolean mv_repair_enabled; - // the default is DefaultUnifiedRepairTokenSplitter. The class should implement IUnifiedRepairTokenRangeSplitter. + // the default is DefaultAutoRepairTokenSplitter. The class should implement IAutoRepairTokenRangeSplitter. // The default implementation splits the tokens based on the token ranges owned by this node divided by the number of 'number_of_subranges' public volatile ParameterizedClass token_range_splitter; // the minimum delay after a node starts before the scheduler starts running repair diff --git a/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java similarity index 88% rename from src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairState.java rename to src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index 1a95bcac88ad..8d838a138fc9 100644 --- a/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import com.google.common.annotations.VisibleForTesting; @@ -26,14 +26,14 @@ import org.apache.cassandra.db.view.TableViews; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.metrics.UnifiedRepairMetricsManager; -import org.apache.cassandra.metrics.UnifiedRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.metrics.AutoRepairMetrics; import org.apache.cassandra.repair.RepairCoordinator; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.UnifiedRepairHistory; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; import org.apache.cassandra.repair.RepairParallelism; import org.apache.cassandra.repair.messages.RepairOption; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.Clock; @@ -55,10 +55,10 @@ import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; -// UnifiedRepairState represents the state of unified repair for a given repair type. -public abstract class UnifiedRepairState implements ProgressListener +// AutoRepairState represents the state of automated repair for a given repair type. +public abstract class AutoRepairState implements ProgressListener { - protected static final Logger logger = LoggerFactory.getLogger(UnifiedRepairState.class); + protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); @VisibleForTesting protected static Supplier timeFunc = Clock.Global::currentTimeMillis; @@ -93,16 +93,16 @@ public abstract class UnifiedRepairState implements ProgressListener protected int skippedTablesCount = 0; @VisibleForTesting - protected UnifiedRepairHistory longestUnrepairedNode; + protected AutoRepairHistory longestUnrepairedNode; @VisibleForTesting protected Condition condition = newOneTimeCondition(); @VisibleForTesting protected boolean success = true; - protected final UnifiedRepairMetrics metrics; + protected final AutoRepairMetrics metrics; - protected UnifiedRepairState(RepairType repairType) + protected AutoRepairState(RepairType repairType) { - metrics = UnifiedRepairMetricsManager.getMetrics(repairType); + metrics = AutoRepairMetricsManager.getMetrics(repairType); this.repairType = repairType; } @@ -229,7 +229,7 @@ public int getRepairKeyspaceCount() return repairKeyspaceCount; } - public void setLongestUnrepairedNode(UnifiedRepairHistory longestUnrepairedNode) + public void setLongestUnrepairedNode(AutoRepairHistory longestUnrepairedNode) { this.longestUnrepairedNode = longestUnrepairedNode; } @@ -279,7 +279,7 @@ public boolean isSuccess() return success; } - public void recordTurn(UnifiedRepairUtils.RepairTurn turn) + public void recordTurn(AutoRepairUtils.RepairTurn turn) { metrics.recordTurn(turn); } @@ -300,7 +300,7 @@ public void resetWaitCondition() } } -class IncrementalRepairState extends UnifiedRepairState +class IncrementalRepairState extends AutoRepairState { public IncrementalRepairState() { @@ -311,7 +311,7 @@ public IncrementalRepairState() public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, true, false, - UnifiedRepairService.instance.getUnifiedRepairConfig().getRepairThreads(repairType), ranges, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.NONE, true, true, false, false, false, false); option.getColumnFamilies().addAll(filterOutUnsafeTables(keyspace, tables)); @@ -345,7 +345,7 @@ protected List filterOutUnsafeTables(String keyspaceName, List t } } -class FullRepairState extends UnifiedRepairState +class FullRepairState extends AutoRepairState { public FullRepairState() { @@ -356,7 +356,7 @@ public FullRepairState() public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, - UnifiedRepairService.instance.getUnifiedRepairConfig().getRepairThreads(repairType), ranges, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.NONE, true, true, false, false, false, false); diff --git a/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java similarity index 83% rename from src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtils.java rename to src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index ed7af2963515..0a328de44f98 100644 --- a/src/java/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.ArrayList; import java.util.Arrays; @@ -61,7 +61,7 @@ import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.serializers.SetSerializer; import org.apache.cassandra.serializers.UUIDSerializer; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageService; @@ -73,21 +73,21 @@ import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.NOT_MY_TURN; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; /** - * This class serves as a utility class for UnifiedRepair. It contains various helper APIs + * This class serves as a utility class for AutoRepair. It contains various helper APIs * to store/retrieve repair status, decide whose turn is next, etc. */ -public class UnifiedRepairUtils +public class AutoRepairUtils { - private static final Logger logger = LoggerFactory.getLogger(UnifiedRepairUtils.class); + private static final Logger logger = LoggerFactory.getLogger(AutoRepairUtils.class); static final String COL_REPAIR_TYPE = "repair_type"; static final String COL_HOST_ID = "host_id"; static final String COL_REPAIR_START_TS = "repair_start_ts"; @@ -100,56 +100,56 @@ public class UnifiedRepairUtils final static String SELECT_REPAIR_HISTORY = String.format( "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE); final static String SELECT_REPAIR_PRIORITY = String.format( "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String DEL_REPAIR_PRIORITY = String.format( "DELETE %s[?] FROM %s.%s WHERE %s = ?", COL_REPAIR_PRIORITY, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String ADD_PRIORITY_HOST = String.format( "UPDATE %s.%s SET %s = %s + ? WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_TYPE); + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_TYPE); final static String INSERT_NEW_REPAIR_HISTORY = String.format( "INSERT INTO %s.%s (%s, %s, %s, %s, %s, %s) values (?, ? ,?, ?, {}, ?) IF NOT EXISTS", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_TYPE, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID, COL_REPAIR_START_TS, COL_REPAIR_FINISH_TS, COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME); final static String ADD_HOST_ID_TO_DELETE_HOSTS = String.format( "UPDATE %s.%s SET %s = %s + ?, %s = ? WHERE %s = ? AND %s = ? IF EXISTS" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_DELETE_HOSTS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME, COL_REPAIR_TYPE, COL_HOST_ID); - final static String DEL_UNIFIED_REPAIR_HISTORY = String.format( + final static String DEL_AUTO_REPAIR_HISTORY = String.format( "DELETE FROM %s.%s WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_TYPE, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); final static String RECORD_START_REPAIR_HISTORY = String.format( "UPDATE %s.%s SET %s= ?, repair_turn = ? WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_START_TS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_START_TS, COL_REPAIR_TYPE, COL_HOST_ID); final static String RECORD_FINISH_REPAIR_HISTORY = String.format( "UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); final static String CLEAR_DELETE_HOSTS = String.format( "UPDATE %s.%s SET %s= {} WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_DELETE_HOSTS, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, COL_REPAIR_TYPE, COL_HOST_ID); final static String SET_FORCE_REPAIR = String.format( "UPDATE %s.%s SET %s=true WHERE %s = ? AND %s = ?" - , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_FORCE_REPAIR, + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); final static String SELECT_LAST_REPAIR_TIME_FOR_NODE = String.format( "SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?", COL_REPAIR_FINISH_TS, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, - SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); + SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); static ModificationStatement delStatementRepairHistory; static SelectStatement selectStatementRepairHistory; @@ -199,14 +199,14 @@ public static void setup() .forInternalCalls()); clearDeleteHostsStatement = (ModificationStatement) QueryProcessor.getStatement(CLEAR_DELETE_HOSTS, ClientState .forInternalCalls()); - delStatementRepairHistory = (ModificationStatement) QueryProcessor.getStatement(DEL_UNIFIED_REPAIR_HISTORY, ClientState + delStatementRepairHistory = (ModificationStatement) QueryProcessor.getStatement(DEL_AUTO_REPAIR_HISTORY, ClientState .forInternalCalls()); - Keyspace unifiedRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); - internalQueryCL = unifiedRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? + Keyspace autoRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); + internalQueryCL = autoRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? ConsistencyLevel.LOCAL_QUORUM : ConsistencyLevel.ONE; } - public static class UnifiedRepairHistory + public static class AutoRepairHistory { UUID hostId; String repairTurn; @@ -216,8 +216,8 @@ public static class UnifiedRepairHistory long deleteHostsUpdateTime; boolean forceRepair; - public UnifiedRepairHistory(UUID hostId, String repairTurn, long lastRepairStartTime, long lastRepairFinishTime, - Set deleteHosts, long deleteHostsUpateTime, boolean forceRepair) + public AutoRepairHistory(UUID hostId, String repairTurn, long lastRepairStartTime, long lastRepairFinishTime, + Set deleteHosts, long deleteHostsUpateTime, boolean forceRepair) { this.hostId = hostId; this.repairTurn = repairTurn; @@ -260,15 +260,15 @@ public static class CurrentRepairStatus public Set hostIdsWithOnGoingRepair; // hosts that is running repair public Set hostIdsWithOnGoingForceRepair; // hosts that is running repair because of force repair Set priority; - List historiesWithoutOnGoingRepair; // hosts that is NOT running repair + List historiesWithoutOnGoingRepair; // hosts that is NOT running repair - public CurrentRepairStatus(List repairHistories, Set priority) + public CurrentRepairStatus(List repairHistories, Set priority) { hostIdsWithOnGoingRepair = new HashSet<>(); hostIdsWithOnGoingForceRepair = new HashSet<>(); historiesWithoutOnGoingRepair = new ArrayList<>(); - for (UnifiedRepairHistory history : repairHistories) + for (AutoRepairHistory history : repairHistories) { if (history.isRepairRunning()) { @@ -301,7 +301,7 @@ public String toString() } @VisibleForTesting - public static List getUnifiedRepairHistory(RepairType repairType) + public static List getAutoRepairHistory(RepairType repairType) { UntypedResultSet repairHistoryResult; @@ -309,7 +309,7 @@ public static List getUnifiedRepairHistory(RepairType repa QueryOptions.forInternalCalls(internalQueryCL, Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()))), Dispatcher.RequestTime.forImmediateExecution()); repairHistoryResult = UntypedResultSet.create(repairStatusRows.result); - List repairHistories = new ArrayList<>(); + List repairHistories = new ArrayList<>(); if (repairHistoryResult.size() > 0) { for (UntypedResultSet.Row row : repairHistoryResult) @@ -323,8 +323,8 @@ public static List getUnifiedRepairHistory(RepairType repa Set deleteHosts = row.getSet(COL_DELETE_HOSTS, UUIDType.instance); long deleteHostsUpdateTime = row.getLong(COL_DELETE_HOSTS_UPDATE_TIME, 0); Boolean forceRepair = row.has(COL_FORCE_REPAIR) ? row.getBoolean(COL_FORCE_REPAIR) : false; - repairHistories.add(new UnifiedRepairHistory(hostId, repairTurn, lastRepairStartTime, lastRepairFinishTime, - deleteHosts, deleteHostsUpdateTime, forceRepair)); + repairHistories.add(new AutoRepairHistory(hostId, repairTurn, lastRepairStartTime, lastRepairFinishTime, + deleteHosts, deleteHostsUpdateTime, forceRepair)); } return repairHistories; } @@ -374,8 +374,8 @@ public static void setForceRepair(RepairType repairType, UUID hostId) public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType) { - List unifiedRepairHistories = getUnifiedRepairHistory(repairType); - return getCurrentRepairStatus(repairType, unifiedRepairHistories); + List autoRepairHistories = getAutoRepairHistory(repairType); + return getCurrentRepairStatus(repairType, autoRepairHistories); } public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) @@ -397,11 +397,11 @@ public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) return repairTime.one().getLong(COL_REPAIR_FINISH_TS); } - public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List unifiedRepairHistories) + public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories) { - if (unifiedRepairHistories != null) + if (autoRepairHistories != null) { - CurrentRepairStatus status = new CurrentRepairStatus(unifiedRepairHistories, getPriorityHostIds(repairType)); + CurrentRepairStatus status = new CurrentRepairStatus(autoRepairHistories, getPriorityHostIds(repairType)); return status; } @@ -415,7 +415,7 @@ protected static TreeSet getHostIdsInCurrentRing(RepairType repairType, Co for (NodeAddresses node : allNodesInRing) { String nodeDC = DatabaseDescriptor.getLocator().location(node.broadcastAddress).datacenter; - if (UnifiedRepairService.instance.getUnifiedRepairConfig().getIgnoreDCs(repairType).contains(nodeDC)) + if (AutoRepairService.instance.getAutoRepairConfig().getIgnoreDCs(repairType).contains(nodeDC)) { logger.info("Ignore node {} because its datacenter is {}", node, nodeDC); continue; @@ -443,39 +443,39 @@ public static TreeSet getHostIdsInCurrentRing(RepairType repairType) } // This function will return the host ID for the node which has not been repaired for longest time - public static UnifiedRepairHistory getHostWithLongestUnrepairTime(RepairType repairType) + public static AutoRepairHistory getHostWithLongestUnrepairTime(RepairType repairType) { - List unifiedRepairHistories = getUnifiedRepairHistory(repairType); - return getHostWithLongestUnrepairTime(unifiedRepairHistories); + List autoRepairHistories = getAutoRepairHistory(repairType); + return getHostWithLongestUnrepairTime(autoRepairHistories); } - private static UnifiedRepairHistory getHostWithLongestUnrepairTime(List unifiedRepairHistories) + private static AutoRepairHistory getHostWithLongestUnrepairTime(List autoRepairHistories) { - if (unifiedRepairHistories == null) + if (autoRepairHistories == null) { return null; } - UnifiedRepairHistory rst = null; + AutoRepairHistory rst = null; long oldestTimestamp = Long.MAX_VALUE; - for (UnifiedRepairHistory unifiedRepairHistory : unifiedRepairHistories) + for (AutoRepairHistory autoRepairHistory : autoRepairHistories) { - if (unifiedRepairHistory.lastRepairFinishTime < oldestTimestamp) + if (autoRepairHistory.lastRepairFinishTime < oldestTimestamp) { - rst = unifiedRepairHistory; - oldestTimestamp = unifiedRepairHistory.lastRepairFinishTime; + rst = autoRepairHistory; + oldestTimestamp = autoRepairHistory.lastRepairFinishTime; } } return rst; } - public static int getMaxNumberOfNodeRunUnifiedRepair(RepairType repairType, int groupSize) + public static int getMaxNumberOfNodeRunAutoRepair(RepairType repairType, int groupSize) { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); if (groupSize == 0) { return Math.max(config.getParallelRepairCount(repairType), 1); } - // we will use the max number from config between unified_repair_parallel_repair_count_in_group and unified_repair_parallel_repair_percentage_in_group + // we will use the max number from config between auto_repair_parallel_repair_count_in_group and auto_repair_parallel_repair_percentage_in_group int value = Math.max(groupSize * config.getParallelRepairPercentage(repairType) / 100, config.getParallelRepairCount(repairType)); // make sure at least one node getting repaired @@ -492,33 +492,33 @@ public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) TreeSet hostIdsInCurrentRing = getHostIdsInCurrentRing(repairType, allNodesInRing); logger.info("Total nodes qualified for repair {}", hostIdsInCurrentRing.size()); - List unifiedRepairHistories = getUnifiedRepairHistory(repairType); - Set unifiedRepairHistoryIds = new HashSet<>(); + List autoRepairHistories = getAutoRepairHistory(repairType); + Set autoRepairHistoryIds = new HashSet<>(); // 1. Remove any node that is not part of group based on goissip info - if (unifiedRepairHistories != null) + if (autoRepairHistories != null) { - for (UnifiedRepairHistory nodeHistory : unifiedRepairHistories) + for (AutoRepairHistory nodeHistory : autoRepairHistories) { - unifiedRepairHistoryIds.add(nodeHistory.hostId); + autoRepairHistoryIds.add(nodeHistory.hostId); // clear delete_hosts if the node's delete hosts is not growing for more than two hours - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); if (nodeHistory.deleteHosts.size() > 0 - && config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( + && config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( currentTimeMillis() - nodeHistory.deleteHostsUpdateTime )) { clearDeleteHosts(repairType, nodeHistory.hostId); logger.info("Delete hosts for {} for repair type {} has not been updated for more than {} seconds. Delete hosts has been cleared. Delete hosts before clear {}" - , nodeHistory.hostId, repairType, config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval(), nodeHistory.deleteHosts); + , nodeHistory.hostId, repairType, config.getAutoRepairHistoryClearDeleteHostsBufferInterval(), nodeHistory.deleteHosts); } else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) { if (nodeHistory.deleteHosts.size() > Math.max(2, hostIdsInCurrentRing.size() * 0.5)) { // More than half of the groups thinks the record should be deleted - logger.info("{} think {} is orphan node, will delete unified repair history for repair type {}.", nodeHistory.deleteHosts, nodeHistory.hostId, repairType); - deleteUnifiedRepairHistory(repairType, nodeHistory.hostId); + logger.info("{} think {} is orphan node, will delete auto repair history for repair type {}.", nodeHistory.deleteHosts, nodeHistory.hostId, repairType); + deleteAutoRepairHistory(repairType, nodeHistory.hostId); } else { @@ -530,23 +530,23 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } - // 2. Add node to unified repair history table if a node is in gossip info + // 2. Add node to auto repair history table if a node is in gossip info for (UUID hostId : hostIdsInCurrentRing) { - if (!unifiedRepairHistoryIds.contains(hostId)) + if (!autoRepairHistoryIds.contains(hostId)) { - logger.info("{} for repair type {} doesn't exist in the unified repair history table, insert a new record.", repairType, hostId); + logger.info("{} for repair type {} doesn't exist in the auto repair history table, insert a new record.", repairType, hostId); insertNewRepairHistory(repairType, hostId, currentTimeMillis(), currentTimeMillis()); } } //get current repair status - CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, unifiedRepairHistories); + CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); if (currentRepairStatus != null) { logger.info("Latest repair status {}", currentRepairStatus); //check if I am forced to run repair - for (UnifiedRepairHistory history : currentRepairStatus.historiesWithoutOnGoingRepair) + for (AutoRepairHistory history : currentRepairStatus.historiesWithoutOnGoingRepair) { if (history.forceRepair && history.hostId.equals(myId)) { @@ -555,24 +555,24 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } - int parallelRepairNumber = getMaxNumberOfNodeRunUnifiedRepair(repairType, - unifiedRepairHistories == null ? 0 : unifiedRepairHistories.size()); + int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepair(repairType, + autoRepairHistories == null ? 0 : autoRepairHistories.size()); logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); if (currentRepairStatus == null || parallelRepairNumber > currentRepairStatus.hostIdsWithOnGoingRepair.size()) { // more repairs can be run, I might be the new one - if (unifiedRepairHistories != null) + if (autoRepairHistories != null) { - logger.info("Unified repair history table has {} records", unifiedRepairHistories.size()); + logger.info("Auto repair history table has {} records", autoRepairHistories.size()); } else { // try to fetch again - unifiedRepairHistories = getUnifiedRepairHistory(repairType); - currentRepairStatus = getCurrentRepairStatus(repairType, unifiedRepairHistories); - if (unifiedRepairHistories == null || currentRepairStatus == null) + autoRepairHistories = getAutoRepairHistory(repairType); + currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); + if (autoRepairHistories == null || currentRepairStatus == null) { logger.error("No record found"); return NOT_MY_TURN; @@ -612,7 +612,7 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } // get the longest unrepaired node from the nodes which are not running repair - UnifiedRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); + AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); //check who is next, which is helpful for debugging logger.info("Next node to be repaired for repair type {} by default: {}", repairType, defaultNodeToBeRepaired); if (defaultNodeToBeRepaired != null && defaultNodeToBeRepaired.hostId.equals(myId)) @@ -634,7 +634,7 @@ else if (currentRepairStatus.hostIdsWithOnGoingForceRepair.contains(myId)) return NOT_MY_TURN; } - static void deleteUnifiedRepairHistory(RepairType repairType, UUID hostId) + static void deleteAutoRepairHistory(RepairType repairType, UUID hostId) { //delete the given hostId delStatementRepairHistory.execute(QueryState.forInternalCalls(), @@ -643,7 +643,7 @@ static void deleteUnifiedRepairHistory(RepairType repairType, UUID hostId) ByteBufferUtil.bytes(hostId))), Dispatcher.RequestTime.forImmediateExecution()); } - static void updateStartUnifiedRepairHistory(RepairType repairType, UUID myId, long timestamp, RepairTurn turn) + static void updateStartAutoRepairHistory(RepairType repairType, UUID myId, long timestamp, RepairTurn turn) { recordStartRepairHistoryStatement.execute(QueryState.forInternalCalls(), QueryOptions.forInternalCalls(internalQueryCL, @@ -654,7 +654,7 @@ static void updateStartUnifiedRepairHistory(RepairType repairType, UUID myId, lo )), Dispatcher.RequestTime.forImmediateExecution()); } - static void updateFinishUnifiedRepairHistory(RepairType repairType, UUID myId, long timestamp) + static void updateFinishAutoRepairHistory(RepairType repairType, UUID myId, long timestamp) { recordFinishRepairHistoryStatement.execute(QueryState.forInternalCalls(), QueryOptions.forInternalCalls(internalQueryCL, @@ -663,15 +663,15 @@ static void updateFinishUnifiedRepairHistory(RepairType repairType, UUID myId, l ByteBufferUtil.bytes(myId) )), Dispatcher.RequestTime.forImmediateExecution()); // Do not remove beblow log, the log is used by dtest - logger.info("Unified repair finished for {}", myId); + logger.info("Auto repair finished for {}", myId); } public static void insertNewRepairHistory(RepairType repairType, UUID hostId, long startTime, long finishTime) { try { - Keyspace unifiedRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); - ConsistencyLevel cl = unifiedRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? + Keyspace autoRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); + ConsistencyLevel cl = autoRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? ConsistencyLevel.LOCAL_SERIAL : null; UntypedResultSet resultSet; @@ -688,7 +688,7 @@ public static void insertNewRepairHistory(RepairType repairType, UUID hostId, lo boolean applied = resultSet.one().getBoolean(ModificationStatement.CAS_RESULT_COLUMN.toString()); if (applied) { - logger.info("Successfully inserted a new unified repair history record for host id: {}", hostId); + logger.info("Successfully inserted a new auto repair history record for host id: {}", hostId); } else { @@ -811,21 +811,21 @@ public static boolean tableMaxRepairTimeExceeded(RepairType repairType, long sta { long tableRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds (currentTimeMillis() - startTime); - return UnifiedRepairService.instance.getUnifiedRepairConfig().getUnifiedRepairTableMaxRepairTime(repairType).toSeconds() < + return AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() < tableRepairTimeSoFar; } public static boolean keyspaceMaxRepairTimeExceeded(RepairType repairType, long startTime, int numOfTablesToBeRepaired) { long keyspaceRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds((currentTimeMillis() - startTime)); - return (long) UnifiedRepairService.instance.getUnifiedRepairConfig().getUnifiedRepairTableMaxRepairTime(repairType).toSeconds() * + return (long) AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() * numOfTablesToBeRepaired < keyspaceRepairTimeSoFar; } public static List getAllMVs(RepairType repairType, Keyspace keyspace, TableMetadata tableMetadata) { List allMvs = new ArrayList<>(); - if (UnifiedRepairService.instance.getUnifiedRepairConfig().getMVRepairEnabled(repairType) && keyspace.getMetadata().views != null) + if (AutoRepairService.instance.getAutoRepairConfig().getMVRepairEnabled(repairType) && keyspace.getMetadata().views != null) { Iterator views = keyspace.getMetadata().views.forTable(tableMetadata.id).iterator(); while (views.hasNext()) @@ -840,12 +840,12 @@ public static List getAllMVs(RepairType repairType, Keyspace keyspace, T public static void runRepairOnNewlyBootstrappedNodeIfEnabled() { - UnifiedRepairConfig repairConfig = DatabaseDescriptor.getUnifiedRepairConfig(); - if (repairConfig.isUnifiedRepairSchedulingEnabled()) + AutoRepairConfig repairConfig = DatabaseDescriptor.getAutoRepairConfig(); + if (repairConfig.isAutoRepairSchedulingEnabled()) { - for (UnifiedRepairConfig.RepairType rType : UnifiedRepairConfig.RepairType.values()) - if (repairConfig.isUnifiedRepairEnabled(rType) && repairConfig.getForceRepairNewNode(rType)) - UnifiedRepairUtils.setForceRepairNewNode(rType); + for (AutoRepairConfig.RepairType rType : AutoRepairConfig.RepairType.values()) + if (repairConfig.isAutoRepairEnabled(rType) && repairConfig.getForceRepairNewNode(rType)) + AutoRepairUtils.setForceRepairNewNode(rType); } } diff --git a/src/java/org/apache/cassandra/repair/unifiedrepair/DefaultUnifiedRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java similarity index 82% rename from src/java/org/apache/cassandra/repair/unifiedrepair/DefaultUnifiedRepairTokenSplitter.java rename to src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java index 98dc09f8ad7d..9a884f61c581 100644 --- a/src/java/org/apache/cassandra/repair/unifiedrepair/DefaultUnifiedRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.ArrayList; @@ -23,20 +23,20 @@ import java.util.Collections; import java.util.List; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.service.StorageService; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.splitEvenly; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.splitEvenly; -public class DefaultUnifiedRepairTokenSplitter implements IUnifiedRepairTokenRangeSplitter +public class DefaultAutoRepairTokenSplitter implements IAutoRepairTokenRangeSplitter { @Override - public List getRepairAssignments(UnifiedRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) + public List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); List repairAssignments = new ArrayList<>(); Collection> tokens = StorageService.instance.getPrimaryRanges(keyspaceName); diff --git a/src/java/org/apache/cassandra/repair/unifiedrepair/IUnifiedRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java similarity index 87% rename from src/java/org/apache/cassandra/repair/unifiedrepair/IUnifiedRepairTokenRangeSplitter.java rename to src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index 251e6b96ec56..169600eca405 100644 --- a/src/java/org/apache/cassandra/repair/unifiedrepair/IUnifiedRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.List; @@ -24,22 +24,22 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -public interface IUnifiedRepairTokenRangeSplitter +public interface IAutoRepairTokenRangeSplitter { /** * Split the token range you wish to repair into multiple assignments. - * The unifiedrepair framework will repair the list of returned subrange in a sequence. + * The autorepair framework will repair the list of returned subrange in a sequence. * @param repairType The type of repair being executed * @param primaryRangeOnly Whether to repair only this node's primary ranges or all of its ranges. * @param keyspaceName The keyspace being repaired * @param tableNames The tables to repair * @return repair assignments broken up by range, keyspace and tables. */ - List getRepairAssignments(UnifiedRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames); + List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames); /** - * Defines a repair assignment to be issued by the unifiedrepair framework. + * Defines a repair assignment to be issued by the autorepair framework. */ class RepairAssignment { diff --git a/src/java/org/apache/cassandra/schema/UnifiedRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java similarity index 74% rename from src/java/org/apache/cassandra/schema/UnifiedRepairParams.java rename to src/java/org/apache/cassandra/schema/AutoRepairParams.java index 27459ad30be0..ea3802db93d2 100644 --- a/src/java/org/apache/cassandra/schema/UnifiedRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -26,12 +26,12 @@ import org.apache.commons.lang3.StringUtils; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import static java.lang.String.format; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; -public final class UnifiedRepairParams +public final class AutoRepairParams { public enum Option { @@ -44,23 +44,23 @@ public String toString() } } - public static final Map> DEFAULT_OPTIONS = - ImmutableMap.of(UnifiedRepairConfig.RepairType.full, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), - UnifiedRepairConfig.RepairType.incremental, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); + public static final Map> DEFAULT_OPTIONS = + ImmutableMap.of(AutoRepairConfig.RepairType.full, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), + AutoRepairConfig.RepairType.incremental, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); - public final UnifiedRepairConfig.RepairType type; + public final AutoRepairConfig.RepairType type; - private Map> options = DEFAULT_OPTIONS; + private Map> options = DEFAULT_OPTIONS; - UnifiedRepairParams(UnifiedRepairConfig.RepairType type) + AutoRepairParams(AutoRepairConfig.RepairType type) { this.type = type; } - public static UnifiedRepairParams create(UnifiedRepairConfig.RepairType repairType, Map options) + public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Map options) { - Map> optionsMap = new HashMap<>(); - for (Map.Entry> entry : DEFAULT_OPTIONS.entrySet()) + Map> optionsMap = new HashMap<>(); + for (Map.Entry> entry : DEFAULT_OPTIONS.entrySet()) { optionsMap.put(entry.getKey(), new HashMap<>(entry.getValue())); } @@ -75,7 +75,7 @@ public static UnifiedRepairParams create(UnifiedRepairConfig.RepairType repairTy optionsMap.get(repairType).put(entry.getKey(), entry.getValue()); } } - UnifiedRepairParams repairParams = new UnifiedRepairParams(repairType); + AutoRepairParams repairParams = new AutoRepairParams(repairType); repairParams.options = optionsMap; return repairParams; } @@ -109,7 +109,7 @@ public Map options() return options.get(type); } - public static UnifiedRepairParams fromMap(UnifiedRepairConfig.RepairType repairType, Map map) + public static AutoRepairParams fromMap(AutoRepairConfig.RepairType repairType, Map map) { return create(repairType, map); } @@ -133,10 +133,10 @@ public boolean equals(Object o) if (this == o) return true; - if (!(o instanceof UnifiedRepairParams)) + if (!(o instanceof AutoRepairParams)) return false; - UnifiedRepairParams cp = (UnifiedRepairParams) o; + AutoRepairParams cp = (AutoRepairParams) o; return options.equals(cp.options); } diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index e1dc51b8038f..ffeb4580eba5 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -45,7 +45,7 @@ import org.apache.cassandra.db.rows.*; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.schema.ColumnMetadata.ClusteringOrder; import org.apache.cassandra.schema.Keyspaces.KeyspacesDiff; @@ -598,8 +598,8 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("compression", params.compression.asMap()) .add("read_repair", params.readRepair.toString()) .add("extensions", params.extensions) - .add("repair_full", params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).asMap()) - .add("repair_incremental", params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).asMap()); + .add("repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) + .add("repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ @@ -1090,8 +1090,8 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) .fastPath(getFastPathStrategy(row)) - .unifiedRepairFull(UnifiedRepairParams.fromMap(UnifiedRepairConfig.RepairType.full, row.getFrozenTextMap("repair_full"))) - .unifiedRepairIncremental(UnifiedRepairParams.fromMap(UnifiedRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))); + .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, row.getFrozenTextMap("repair_full"))) + .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index 209007868132..f9c023e33b32 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -95,11 +95,11 @@ private SystemDistributedKeyspace() public static final String PARTITION_DENYLIST_TABLE = "partition_denylist"; - public static final String UNIFIED_REPAIR_HISTORY = "unified_repair_history"; + public static final String AUTO_REPAIR_HISTORY = "auto_repair_history"; - public static final String UNIFIED_REPAIR_PRIORITY = "unified_repair_priority"; + public static final String AUTO_REPAIR_PRIORITY = "auto_repair_priority"; - public static final Set TABLE_NAMES = ImmutableSet.of(REPAIR_HISTORY, PARENT_REPAIR_HISTORY, VIEW_BUILD_STATUS, PARTITION_DENYLIST_TABLE, UNIFIED_REPAIR_HISTORY, UNIFIED_REPAIR_PRIORITY); + public static final Set TABLE_NAMES = ImmutableSet.of(REPAIR_HISTORY, PARENT_REPAIR_HISTORY, VIEW_BUILD_STATUS, PARTITION_DENYLIST_TABLE, AUTO_REPAIR_HISTORY, AUTO_REPAIR_PRIORITY); public static final String REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + "keyspace_name text," @@ -162,27 +162,27 @@ private SystemDistributedKeyspace() private static final TableMetadata PartitionDenylistTable = parse(PARTITION_DENYLIST_TABLE, "Partition keys which have been denied access", PARTITION_DENYLIST_CQL).build(); - public static final String UNIFIED_REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" - + "host_id uuid," - + "repair_type text," - + "repair_turn text," - + "repair_start_ts timestamp," - + "repair_finish_ts timestamp," - + "delete_hosts set," - + "delete_hosts_update_time timestamp," - + "force_repair boolean," - + "PRIMARY KEY (repair_type, host_id))"; + public static final String AUTO_REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + + "host_id uuid," + + "repair_type text," + + "repair_turn text," + + "repair_start_ts timestamp," + + "repair_finish_ts timestamp," + + "delete_hosts set," + + "delete_hosts_update_time timestamp," + + "force_repair boolean," + + "PRIMARY KEY (repair_type, host_id))"; - private static final TableMetadata UnifiedRepairHistoryTable = - parse(UNIFIED_REPAIR_HISTORY, "Unified repair history for each node", UNIFIED_REPAIR_HISTORY_CQL).build(); + private static final TableMetadata AutoRepairHistoryTable = + parse(AUTO_REPAIR_HISTORY, "Auto repair history for each node", AUTO_REPAIR_HISTORY_CQL).build(); - public static final String UNIFIED_REPAIR_PRIORITY_CQL = "CREATE TABLE IF NOT EXISTS %s (" - + "repair_type text," - + "repair_priority set," - + "PRIMARY KEY (repair_type))"; + public static final String AUTO_REPAIR_PRIORITY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + + "repair_type text," + + "repair_priority set," + + "PRIMARY KEY (repair_type))"; - private static final TableMetadata UnifiedRepairPriorityTable = - parse(UNIFIED_REPAIR_PRIORITY, "Unified repair priority for each group", UNIFIED_REPAIR_PRIORITY_CQL).build(); + private static final TableMetadata AutoRepairPriorityTable = + parse(AUTO_REPAIR_PRIORITY, "Auto repair priority for each group", AUTO_REPAIR_PRIORITY_CQL).build(); private static TableMetadata.Builder parse(String table, String description, String cql) @@ -197,7 +197,7 @@ public static KeyspaceMetadata metadata() { return KeyspaceMetadata.create(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, KeyspaceParams.simple(Math.max(DEFAULT_RF, DatabaseDescriptor.getDefaultKeyspaceRF())), - Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable, UnifiedRepairHistoryTable, UnifiedRepairPriorityTable)); + Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable, AutoRepairHistoryTable, AutoRepairPriorityTable)); } public static void startParentRepair(TimeUUID parent_id, String keyspaceName, String[] cfnames, RepairOption options) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 8d432667408e..2c3cfe4daf28 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -39,7 +39,7 @@ import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.reads.PercentileSpeculativeRetryPolicy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; @@ -137,7 +137,7 @@ public String toString() public final TransactionalMigrationFromMode transactionalMigrationFrom; public final boolean pendingDrop; - public final Map unifiedRepair; + public final Map automatedRepair; private TableParams(Builder builder) { @@ -167,11 +167,11 @@ private TableParams(Builder builder) transactionalMigrationFrom = builder.transactionalMigrationFrom; pendingDrop = builder.pendingDrop; checkNotNull(transactionalMigrationFrom); - unifiedRepair = new EnumMap(UnifiedRepairConfig.RepairType.class) + automatedRepair = new EnumMap(AutoRepairConfig.RepairType.class) { { - put(UnifiedRepairConfig.RepairType.full, builder.unifiedRepairFull); - put(UnifiedRepairConfig.RepairType.incremental, builder.unifiedRepairIncremental); + put(AutoRepairConfig.RepairType.full, builder.automatedRepairFull); + put(AutoRepairConfig.RepairType.incremental, builder.automatedRepairIncremental); } }; } @@ -206,8 +206,8 @@ public static Builder builder(TableParams params) .transactionalMode(params.transactionalMode) .transactionalMigrationFrom(params.transactionalMigrationFrom) .pendingDrop(params.pendingDrop) - .unifiedRepairFull(params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full)) - .unifiedRepairIncremental(params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental)) + .automatedRepairFull(params.automatedRepair.get(AutoRepairConfig.RepairType.full)) + .automatedRepairIncremental(params.automatedRepair.get(AutoRepairConfig.RepairType.incremental)) ; } @@ -267,7 +267,7 @@ public void validate() if (transactionalMode.isTestMode() && !CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getBoolean()) fail("Transactional mode " + transactionalMode + " can't be used if " + CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getKey() + " is not set"); - for (Map.Entry entry : unifiedRepair.entrySet()) + for (Map.Entry entry : automatedRepair.entrySet()) { entry.getValue().validate(); } @@ -312,7 +312,7 @@ public boolean equals(Object o) && transactionalMode == p.transactionalMode && transactionalMigrationFrom == p.transactionalMigrationFrom && pendingDrop == p.pendingDrop - && unifiedRepair.equals(p.unifiedRepair); + && automatedRepair.equals(p.automatedRepair); } @Override @@ -341,7 +341,7 @@ public int hashCode() transactionalMode, transactionalMigrationFrom, pendingDrop, - unifiedRepair); + automatedRepair); } @Override @@ -372,8 +372,8 @@ public String toString() .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) .add(PENDING_DROP.toString(), pendingDrop) - .add(Option.REPAIR_FULL.toString(), unifiedRepair.get(UnifiedRepairConfig.RepairType.full)) - .add(Option.REPAIR_INCREMENTAL.toString(), unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental)) + .add(Option.REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.full)) + .add(Option.REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.incremental)) .toString(); } @@ -436,9 +436,9 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()) .newLine() - .append("AND repair_full = ").append(unifiedRepair.get(UnifiedRepairConfig.RepairType.full).asMap()) + .append("AND repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) .newLine() - .append("AND repair_incremental = ").append(unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).asMap()); + .append("AND repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); } public static final class Builder @@ -467,8 +467,8 @@ public static final class Builder public TransactionalMigrationFromMode transactionalMigrationFrom = TransactionalMigrationFromMode.none; public boolean pendingDrop = false; - private UnifiedRepairParams unifiedRepairFull = new UnifiedRepairParams(UnifiedRepairConfig.RepairType.full); - private UnifiedRepairParams unifiedRepairIncremental = new UnifiedRepairParams(UnifiedRepairConfig.RepairType.incremental); + private AutoRepairParams automatedRepairFull = new AutoRepairParams(AutoRepairConfig.RepairType.full); + private AutoRepairParams automatedRepairIncremental = new AutoRepairParams(AutoRepairConfig.RepairType.incremental); public Builder() { @@ -617,15 +617,15 @@ public Builder pendingDrop(boolean pendingDrop) return this; } - public Builder unifiedRepairFull(UnifiedRepairParams val) + public Builder automatedRepairFull(AutoRepairParams val) { - unifiedRepairFull = val; + automatedRepairFull = val; return this; } - public Builder unifiedRepairIncremental(UnifiedRepairParams val) + public Builder automatedRepairIncremental(AutoRepairParams val) { - unifiedRepairIncremental = val; + automatedRepairIncremental = val; return this; } } diff --git a/src/java/org/apache/cassandra/service/UnifiedRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java similarity index 72% rename from src/java/org/apache/cassandra/service/UnifiedRepairService.java rename to src/java/org/apache/cassandra/service/AutoRepairService.java index 489dd6873011..7a8342c2a7a4 100644 --- a/src/java/org/apache/cassandra/service/UnifiedRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -20,9 +20,9 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.utils.MBeanWrapper; import java.util.HashSet; @@ -32,23 +32,23 @@ import com.google.common.annotations.VisibleForTesting; -public class UnifiedRepairService implements UnifiedRepairServiceMBean +public class AutoRepairService implements AutoRepairServiceMBean { - public static final String MBEAN_NAME = "org.apache.cassandra.db:type=UnifiedRepairService"; + public static final String MBEAN_NAME = "org.apache.cassandra.db:type=AutoRepairService"; @VisibleForTesting - protected UnifiedRepairConfig config; + protected AutoRepairConfig config; - public static final UnifiedRepairService instance = new UnifiedRepairService(); + public static final AutoRepairService instance = new AutoRepairService(); @VisibleForTesting - protected UnifiedRepairService() + protected AutoRepairService() { } public static void setup() { - instance.config = DatabaseDescriptor.getUnifiedRepairConfig(); + instance.config = DatabaseDescriptor.getAutoRepairConfig(); } static @@ -58,8 +58,8 @@ public static void setup() public void checkCanRun(RepairType repairType) { - if (!config.isUnifiedRepairSchedulingEnabled()) - throw new ConfigurationException("Unified-repair scheduller is disabled."); + if (!config.isAutoRepairSchedulingEnabled()) + throw new ConfigurationException("Auto-repair scheduller is disabled."); if (repairType != RepairType.incremental) return; @@ -72,16 +72,16 @@ public void checkCanRun(RepairType repairType) } @Override - public UnifiedRepairConfig getUnifiedRepairConfig() + public AutoRepairConfig getAutoRepairConfig() { return config; } @Override - public void setUnifiedRepairEnabled(RepairType repairType, boolean enabled) + public void setAutoRepairEnabled(RepairType repairType, boolean enabled) { checkCanRun(repairType); - config.setUnifiedRepairEnabled(repairType, enabled); + config.setAutoRepairEnabled(repairType, enabled); } @Override @@ -93,18 +93,18 @@ public void setRepairThreads(RepairType repairType, int repairThreads) @Override public void setRepairPriorityForHosts(RepairType repairType, Set hosts) { - UnifiedRepairUtils.addPriorityHosts(repairType, hosts); + AutoRepairUtils.addPriorityHosts(repairType, hosts); } @Override public Set getRepairHostPriority(RepairType repairType) { - return UnifiedRepairUtils.getPriorityHosts(repairType); + return AutoRepairUtils.getPriorityHosts(repairType); } @Override public void setForceRepairForHosts(RepairType repairType, Set hosts) { - UnifiedRepairUtils.setForceRepair(repairType, hosts); + AutoRepairUtils.setForceRepair(repairType, hosts); } @Override @@ -125,19 +125,19 @@ public void startScheduler() config.startScheduler(); } - public void setUnifiedRepairHistoryClearDeleteHostsBufferDuration(String duration) + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) { - config.setUnifiedRepairHistoryClearDeleteHostsBufferInterval(duration); + config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); } @Override - public void setUnifiedRepairMaxRetriesCount(int retries) + public void setAutoRepairMaxRetriesCount(int retries) { config.setRepairMaxRetries(retries); } @Override - public void setUnifiedRepairRetryBackoff(String interval) + public void setAutoRepairRetryBackoff(String interval) { config.setRepairRetryBackoff(interval); } @@ -149,9 +149,9 @@ public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssta } @Override - public void setUnifiedRepairTableMaxRepairTime(RepairType repairType, String unifiedRepairTableMaxRepairTime) + public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) { - config.setUnifiedRepairTableMaxRepairTime(repairType, unifiedRepairTableMaxRepairTime); + config.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); } @Override @@ -192,12 +192,12 @@ public void setRepairSessionTimeout(RepairType repairType, String timeout) public Set getOnGoingRepairHostIds(RepairType rType) { Set hostIds = new HashSet<>(); - List histories = UnifiedRepairUtils.getUnifiedRepairHistory(rType); + List histories = AutoRepairUtils.getAutoRepairHistory(rType); if (histories == null) { return hostIds; } - UnifiedRepairUtils.CurrentRepairStatus currentRepairStatus = new UnifiedRepairUtils.CurrentRepairStatus(histories, UnifiedRepairUtils.getPriorityHostIds(rType)); + AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(rType)); for (UUID id : currentRepairStatus.hostIdsWithOnGoingRepair) { hostIds.add(id.toString()); diff --git a/src/java/org/apache/cassandra/service/UnifiedRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java similarity index 74% rename from src/java/org/apache/cassandra/service/UnifiedRepairServiceMBean.java rename to src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index ec5a71bdcddd..121c9a480303 100644 --- a/src/java/org/apache/cassandra/service/UnifiedRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -18,17 +18,17 @@ package org.apache.cassandra.service; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import java.util.Set; -public interface UnifiedRepairServiceMBean +public interface AutoRepairServiceMBean { /** - * Enable or disable unified-repair for a given repair type + * Enable or disable auto-repair for a given repair type */ - public void setUnifiedRepairEnabled(RepairType repairType, boolean enabled); + public void setAutoRepairEnabled(RepairType repairType, boolean enabled); public void setRepairThreads(RepairType repairType, int repairThreads); @@ -44,15 +44,15 @@ public interface UnifiedRepairServiceMBean void startScheduler(); - public void setUnifiedRepairHistoryClearDeleteHostsBufferDuration(String duration); + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); - public void setUnifiedRepairMaxRetriesCount(int retries); + public void setAutoRepairMaxRetriesCount(int retries); - public void setUnifiedRepairRetryBackoff(String interval); + public void setAutoRepairRetryBackoff(String interval); public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssTableHigherThreshold); - public void setUnifiedRepairTableMaxRepairTime(RepairType repairType, String unifiedRepairTableMaxRepairTime); + public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime); public void setIgnoreDCs(RepairType repairType, Set ignorDCs); @@ -64,7 +64,7 @@ public interface UnifiedRepairServiceMBean public void setMVRepairEnabled(RepairType repairType, boolean enabled); - public UnifiedRepairConfig getUnifiedRepairConfig(); + public AutoRepairConfig getAutoRepairConfig(); public void setRepairSessionTimeout(RepairType repairType, String timeout); diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index 90ace416c8cf..69ee2c2c5d67 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -403,7 +403,7 @@ protected void setup() AuditLogManager.instance.initialize(); - StorageService.instance.doUnifiedRepairSetup(); + StorageService.instance.doAutoRepairSetup(); // schedule periodic background compaction task submission. this is simply a backstop against compactions stalling // due to scheduling errors or race conditions diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index afa1202137ef..a635b8f7b7b6 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -73,7 +73,7 @@ import com.google.common.util.concurrent.Uninterruptibles; import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepair; +import org.apache.cassandra.repair.autorepair.AutoRepair; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1131,15 +1131,15 @@ public void doAuthSetup(boolean async) } } - public void doUnifiedRepairSetup() + public void doAutoRepairSetup() { - UnifiedRepairService.setup(); - if (DatabaseDescriptor.getUnifiedRepairConfig().isUnifiedRepairSchedulingEnabled()) + AutoRepairService.setup(); + if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { - logger.info("Enable unified-repair scheduling"); - UnifiedRepair.instance.setup(); + logger.info("Enable auto-repair scheduling"); + AutoRepair.instance.setup(); } - logger.info("UnifiedRepair setup complete!"); + logger.info("AutoRepair setup complete!"); } diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java index 78ee739dd452..19d6cb50edae 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java @@ -40,7 +40,7 @@ import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordService; @@ -222,7 +222,7 @@ public SequenceState executeNext() return halted(); } // this node might have just bootstrapped; check if we should run repair immediately - UnifiedRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java index f074e285df13..f3a3a8ff4577 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java @@ -44,7 +44,7 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -219,7 +219,7 @@ public SequenceState executeNext() return halted(); } // this node might have just bootstrapped; check if we should run repair immediately - UnifiedRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else diff --git a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java index ab4d8ffd68cc..1c55d84ca244 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java +++ b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java @@ -29,7 +29,7 @@ import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -87,7 +87,7 @@ public static void streamData(NodeId nodeId, ClusterMetadata metadata, boolean s throw new IllegalStateException("Could not finish join for during replacement"); } // this node might have just bootstrapped; check if we should run repair immediately - UnifiedRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } if (finishJoiningRing) diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index e495ef781c38..5417c127e5e0 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -113,10 +113,10 @@ import org.apache.cassandra.metrics.ThreadPoolMetrics; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.MessagingServiceMBean; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.ActiveRepairServiceMBean; -import org.apache.cassandra.service.UnifiedRepairService; -import org.apache.cassandra.service.UnifiedRepairServiceMBean; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.AutoRepairServiceMBean; import org.apache.cassandra.service.CacheService; import org.apache.cassandra.service.CacheServiceMBean; import org.apache.cassandra.service.snapshot.SnapshotManagerMBean; @@ -184,7 +184,7 @@ public class NodeProbe implements AutoCloseable protected CIDRGroupsMappingManagerMBean cmbProxy; protected PermissionsCacheMBean pcProxy; protected RolesCacheMBean rcProxy; - protected UnifiedRepairServiceMBean unifiedRepairProxy; + protected AutoRepairServiceMBean autoRepairProxy; protected Output output; private boolean failed; @@ -331,8 +331,8 @@ protected void connect() throws IOException name = new ObjectName(CIDRFilteringMetricsTable.MBEAN_NAME); cfmProxy = JMX.newMBeanProxy(mbeanServerConn, name, CIDRFilteringMetricsTableMBean.class); - name = new ObjectName(UnifiedRepairService.MBEAN_NAME); - unifiedRepairProxy = JMX.newMBeanProxy(mbeanServerConn, name, UnifiedRepairServiceMBean.class); + name = new ObjectName(AutoRepairService.MBEAN_NAME); + autoRepairProxy = JMX.newMBeanProxy(mbeanServerConn, name, AutoRepairServiceMBean.class); } catch (MalformedObjectNameException e) { @@ -2564,95 +2564,95 @@ public void abortBootstrap(String nodeId, String endpoint) ssProxy.abortBootstrap(nodeId, endpoint); } - public UnifiedRepairConfig getUnifiedRepairConfig() { - return unifiedRepairProxy.getUnifiedRepairConfig(); + public AutoRepairConfig getAutoRepairConfig() { + return autoRepairProxy.getAutoRepairConfig(); } - public void setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType repairType, boolean enabled) + public void setAutoRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) { - unifiedRepairProxy.setUnifiedRepairEnabled(repairType, enabled); + autoRepairProxy.setAutoRepairEnabled(repairType, enabled); } - public void setRepairThreads(UnifiedRepairConfig.RepairType repairType, int repairThreads) + public void setRepairThreads(AutoRepairConfig.RepairType repairType, int repairThreads) { - unifiedRepairProxy.setRepairThreads(repairType, repairThreads); + autoRepairProxy.setRepairThreads(repairType, repairThreads); } - public void setRepairPriorityForHosts(UnifiedRepairConfig.RepairType repairType, Set hosts) + public void setRepairPriorityForHosts(AutoRepairConfig.RepairType repairType, Set hosts) { - unifiedRepairProxy.setRepairPriorityForHosts(repairType, hosts); + autoRepairProxy.setRepairPriorityForHosts(repairType, hosts); } - public Set getRepairPriorityForHosts(UnifiedRepairConfig.RepairType repairType) + public Set getRepairPriorityForHosts(AutoRepairConfig.RepairType repairType) { - return unifiedRepairProxy.getRepairHostPriority(repairType); + return autoRepairProxy.getRepairHostPriority(repairType); } - public void setForceRepairForHosts(UnifiedRepairConfig.RepairType repairType, Set hosts){ - unifiedRepairProxy.setForceRepairForHosts(repairType, hosts); + public void setForceRepairForHosts(AutoRepairConfig.RepairType repairType, Set hosts){ + autoRepairProxy.setForceRepairForHosts(repairType, hosts); } - public void setRepairSubRangeNum(UnifiedRepairConfig.RepairType repairType, int repairSubRanges) + public void setRepairSubRangeNum(AutoRepairConfig.RepairType repairType, int repairSubRanges) { - unifiedRepairProxy.setRepairSubRangeNum(repairType, repairSubRanges); + autoRepairProxy.setRepairSubRangeNum(repairType, repairSubRanges); } - public void setRepairMinInterval(UnifiedRepairConfig.RepairType repairType, String minRepairInterval) + public void setRepairMinInterval(AutoRepairConfig.RepairType repairType, String minRepairInterval) { - unifiedRepairProxy.setRepairMinInterval(repairType, minRepairInterval); + autoRepairProxy.setRepairMinInterval(repairType, minRepairInterval); } - public void setUnifiedRepairHistoryClearDeleteHostsBufferDuration(String duration) + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) { - unifiedRepairProxy.setUnifiedRepairHistoryClearDeleteHostsBufferDuration(duration); + autoRepairProxy.setAutoRepairHistoryClearDeleteHostsBufferDuration(duration); } public void startScheduler() { - unifiedRepairProxy.startScheduler(); + autoRepairProxy.startScheduler(); } - public void setUnifiedRepairMaxRetriesCount(int retries) + public void setAutoRepairMaxRetriesCount(int retries) { - unifiedRepairProxy.setUnifiedRepairMaxRetriesCount(retries); + autoRepairProxy.setAutoRepairMaxRetriesCount(retries); } - public void setUnifiedRepairRetryBackoff(String interval) + public void setAutoRepairRetryBackoff(String interval) { - unifiedRepairProxy.setUnifiedRepairRetryBackoff(interval); + autoRepairProxy.setAutoRepairRetryBackoff(interval); } - public void setRepairSSTableCountHigherThreshold(UnifiedRepairConfig.RepairType repairType, int ssTableHigherThreshold) + public void setRepairSSTableCountHigherThreshold(AutoRepairConfig.RepairType repairType, int ssTableHigherThreshold) { - unifiedRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); + autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); } - public void setUnifiedRepairTableMaxRepairTime(UnifiedRepairConfig.RepairType repairType, String unifiedRepairTableMaxRepairTime) + public void setAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType repairType, String autoRepairTableMaxRepairTime) { - unifiedRepairProxy.setUnifiedRepairTableMaxRepairTime(repairType, unifiedRepairTableMaxRepairTime); + autoRepairProxy.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); } - public void setUnifiedRepairIgnoreDCs(UnifiedRepairConfig.RepairType repairType, Set ignoreDCs) + public void setAutoRepairIgnoreDCs(AutoRepairConfig.RepairType repairType, Set ignoreDCs) { - unifiedRepairProxy.setIgnoreDCs(repairType, ignoreDCs); + autoRepairProxy.setIgnoreDCs(repairType, ignoreDCs); } - public void setParallelRepairPercentage(UnifiedRepairConfig.RepairType repairType, int percentage) { - unifiedRepairProxy.setParallelRepairPercentage(repairType, percentage); + public void setParallelRepairPercentage(AutoRepairConfig.RepairType repairType, int percentage) { + autoRepairProxy.setParallelRepairPercentage(repairType, percentage); } - public void setParallelRepairCount(UnifiedRepairConfig.RepairType repairType, int count) { - unifiedRepairProxy.setParallelRepairCount(repairType, count); + public void setParallelRepairCount(AutoRepairConfig.RepairType repairType, int count) { + autoRepairProxy.setParallelRepairCount(repairType, count); } - public void setPrimaryTokenRangeOnly(UnifiedRepairConfig.RepairType repairType, boolean primaryTokenRangeOnly) + public void setPrimaryTokenRangeOnly(AutoRepairConfig.RepairType repairType, boolean primaryTokenRangeOnly) { - unifiedRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); + autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); } - public void setMVRepairEnabled(UnifiedRepairConfig.RepairType repairType, boolean enabled) + public void setMVRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) { - unifiedRepairProxy.setMVRepairEnabled(repairType, enabled); + autoRepairProxy.setMVRepairEnabled(repairType, enabled); } public List mutateSSTableRepairedState(boolean repair, boolean preview, String keyspace, List tables) throws InvalidRequestException @@ -2664,14 +2664,14 @@ public List getTablesForKeyspace(String keyspace) { return ssProxy.getTablesForKeyspace(keyspace); } - public void setRepairSessionTimeout(UnifiedRepairConfig.RepairType repairType, String timeout) + public void setRepairSessionTimeout(AutoRepairConfig.RepairType repairType, String timeout) { - unifiedRepairProxy.setRepairSessionTimeout(repairType, timeout); + autoRepairProxy.setRepairSessionTimeout(repairType, timeout); } - public Set getOnGoingRepairHostIds(UnifiedRepairConfig.RepairType type) + public Set getOnGoingRepairHostIds(AutoRepairConfig.RepairType type) { - return unifiedRepairProxy.getOnGoingRepairHostIds(type); + return autoRepairProxy.getOnGoingRepairHostIds(type); } } diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 42a23ffee213..3056b03f5fbc 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -93,6 +93,7 @@ public int execute(String... args) List> commands = newArrayList( AbortBootstrap.class, AlterTopology.class, + AutoRepairStatus.class, Assassinate.class, CassHelp.class, CIDRFilteringStats.class, @@ -133,6 +134,7 @@ public int execute(String... args) GcStats.class, GetAuditLog.class, GetAuthCacheConfig.class, + GetAutoRepairConfig.class, GetBatchlogReplayTrottle.class, GetCIDRGroupsOfIP.class, GetColumnIndexSize.class, @@ -153,7 +155,6 @@ public int execute(String... args) GetStreamThroughput.class, GetTimeout.class, GetTraceProbability.class, - GetUnifiedRepairConfig.class, GossipInfo.class, Import.class, Info.class, @@ -198,6 +199,7 @@ public int execute(String... args) Ring.class, Scrub.class, SetAuthCacheConfig.class, + SetAutoRepairConfig.class, SetBatchlogReplayThrottle.class, SetCacheCapacity.class, SetCacheKeysToSave.class, @@ -216,7 +218,6 @@ public int execute(String... args) SetStreamThroughput.class, SetTimeout.class, SetTraceProbability.class, - SetUnifiedRepairConfig.class, Sjk.class, Snapshot.class, SSTableRepairedSet.class, @@ -233,7 +234,6 @@ public int execute(String... args) TopPartitions.class, TpStats.class, TruncateHints.class, - UnifiedRepairStatus.class, UpdateCIDRGroup.class, UpgradeSSTable.class, Verify.class, diff --git a/src/java/org/apache/cassandra/tools/nodetool/UnifiedRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java similarity index 82% rename from src/java/org/apache/cassandra/tools/nodetool/UnifiedRepairStatus.java rename to src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java index b40fe887e5d8..7b96102c6698 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/UnifiedRepairStatus.java +++ b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java @@ -25,19 +25,19 @@ import io.airlift.airline.Command; import io.airlift.airline.Option; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool; import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; import static com.google.common.base.Preconditions.checkArgument; -@Command(name = "unifiedrepairstatus", description = "Print unifiedrepair status") -public class UnifiedRepairStatus extends NodeTool.NodeToolCmd +@Command(name = "autorepairstatus", description = "Print autorepair status") +public class AutoRepairStatus extends NodeTool.NodeToolCmd { @VisibleForTesting @Option(title = "repair type", name = { "-t", "--repair-type" }, description = "Repair type") - protected UnifiedRepairConfig.RepairType repairType; + protected AutoRepairConfig.RepairType repairType; @Override public void execute(NodeProbe probe) @@ -45,10 +45,10 @@ public void execute(NodeProbe probe) checkArgument(repairType != null, "--repair-type is required."); PrintStream out = probe.output().out; - UnifiedRepairConfig config = probe.getUnifiedRepairConfig(); - if (config == null || !config.isUnifiedRepairSchedulingEnabled()) + AutoRepairConfig config = probe.getAutoRepairConfig(); + if (config == null || !config.isAutoRepairSchedulingEnabled()) { - out.println("Unified-repair is not enabled"); + out.println("Auto-repair is not enabled"); return; } diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetUnifiedRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java similarity index 82% rename from src/java/org/apache/cassandra/tools/nodetool/GetUnifiedRepairConfig.java rename to src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index 336063eb2405..48f8d54de3f6 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetUnifiedRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -20,15 +20,15 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import io.airlift.airline.Command; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; import java.io.PrintStream; -@Command(name = "getunifiedrepairconfig", description = "Print unifiedrepair configurations") -public class GetUnifiedRepairConfig extends NodeToolCmd +@Command(name = "getautorepairconfig", description = "Print autorepair configurations") +public class GetAutoRepairConfig extends NodeToolCmd { @VisibleForTesting protected static PrintStream out = System.out; @@ -36,17 +36,17 @@ public class GetUnifiedRepairConfig extends NodeToolCmd @Override public void execute(NodeProbe probe) { - UnifiedRepairConfig config = probe.getUnifiedRepairConfig(); - if (config == null || !config.isUnifiedRepairSchedulingEnabled()) + AutoRepairConfig config = probe.getAutoRepairConfig(); + if (config == null || !config.isAutoRepairSchedulingEnabled()) { - out.println("Unified-repair is not enabled"); + out.println("Auto-repair is not enabled"); return; } StringBuilder sb = new StringBuilder(); sb.append("repair scheduler configuration:"); sb.append("\n\trepair eligibility check interval: " + config.getRepairCheckInterval()); - sb.append("\n\tTTL for repair history for dead nodes: " + config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval()); + sb.append("\n\tTTL for repair history for dead nodes: " + config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); sb.append("\n\tmax retries for repair: " + config.getRepairMaxRetries()); sb.append("\n\tretry backoff: " + config.getRepairRetryBackoff()); for (RepairType repairType : RepairType.values()) @@ -57,17 +57,17 @@ public void execute(NodeProbe probe) out.println(sb); } - private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, UnifiedRepairConfig config) + private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, AutoRepairConfig config) { StringBuilder sb = new StringBuilder(); sb.append("\nconfiguration for repair type: " + repairType); - sb.append("\n\tenabled: " + config.isUnifiedRepairEnabled(repairType)); + sb.append("\n\tenabled: " + config.isAutoRepairEnabled(repairType)); sb.append("\n\tminimum repair interval: " + config.getRepairMinInterval(repairType)); sb.append("\n\trepair threads: " + config.getRepairThreads(repairType)); sb.append("\n\tnumber of repair subranges: " + config.getRepairSubRangeNum(repairType)); sb.append("\n\tpriority hosts: " + Joiner.on(',').skipNulls().join(probe.getRepairPriorityForHosts(repairType))); sb.append("\n\tsstable count higher threshold: " + config.getRepairSSTableCountHigherThreshold(repairType)); - sb.append("\n\ttable max repair time in sec: " + config.getUnifiedRepairTableMaxRepairTime(repairType)); + sb.append("\n\ttable max repair time in sec: " + config.getAutoRepairTableMaxRepairTime(repairType)); sb.append("\n\tignore datacenters: " + Joiner.on(',').skipNulls().join(config.getIgnoreDCs(repairType))); sb.append("\n\trepair primary token-range: " + config.getRepairPrimaryTokenRangeOnly(repairType)); sb.append("\n\tnumber of parallel repairs within group: " + config.getParallelRepairCount(repairType)); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java similarity index 83% rename from src/java/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfig.java rename to src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index fe123c3de1ac..2929c944442a 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -24,7 +24,7 @@ import io.airlift.airline.Command; import io.airlift.airline.Option; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; @@ -37,12 +37,12 @@ import static com.google.common.base.Preconditions.checkArgument; -@Command(name = "setunifiedrepairconfig", description = "sets the unifiedrepair configuration") -public class SetUnifiedRepairConfig extends NodeToolCmd +@Command(name = "setautorepairconfig", description = "sets the autorepair configuration") +public class SetAutoRepairConfig extends NodeToolCmd { @VisibleForTesting - @Arguments(title = " ", usage = " ", - description = "unifiedrepair param and value.\nPossible unifiedrepair parameters are as following: " + + @Arguments(title = " ", usage = " ", + description = "autorepair param and value.\nPossible autorepair parameters are as following: " + "[start_scheduler|number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + @@ -60,13 +60,13 @@ public class SetUnifiedRepairConfig extends NodeToolCmd @Override public void execute(NodeProbe probe) { - checkArgument(args.size() == 2, "setunifiedrepairconfig requires param-type, and value args."); + checkArgument(args.size() == 2, "setautorepairconfig requires param-type, and value args."); String paramType = args.get(0); String paramVal = args.get(1); - if (!probe.getUnifiedRepairConfig().isUnifiedRepairSchedulingEnabled() && !paramType.equalsIgnoreCase("start_scheduler")) + if (!probe.getAutoRepairConfig().isAutoRepairSchedulingEnabled() && !paramType.equalsIgnoreCase("start_scheduler")) { - out.println("Unified-repair is not enabled"); + out.println("Auto-repair is not enabled"); return; } @@ -80,13 +80,13 @@ public void execute(NodeProbe probe) } return; case "history_clear_delete_hosts_buffer_interval": - probe.setUnifiedRepairHistoryClearDeleteHostsBufferDuration(paramVal); + probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); return; case "repair_max_retries": - probe.setUnifiedRepairMaxRetriesCount(Integer.parseInt(paramVal)); + probe.setAutoRepairMaxRetriesCount(Integer.parseInt(paramVal)); return; case "repair_retry_backoff": - probe.setUnifiedRepairRetryBackoff(paramVal); + probe.setAutoRepairRetryBackoff(paramVal); return; default: // proceed to options that require --repair-type option @@ -99,7 +99,7 @@ public void execute(NodeProbe probe) switch (paramType) { case "enabled": - probe.setUnifiedRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); + probe.setAutoRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); break; case "number_of_repair_threads": probe.setRepairThreads(repairType, Integer.parseInt(paramVal)); @@ -114,7 +114,7 @@ public void execute(NodeProbe probe) probe.setRepairSSTableCountHigherThreshold(repairType, Integer.parseInt(paramVal)); break; case "table_max_repair_time": - probe.setUnifiedRepairTableMaxRepairTime(repairType, paramVal); + probe.setAutoRepairTableMaxRepairTime(repairType, paramVal); break; case "priority_hosts": hosts = retrieveHosts(paramVal); @@ -136,7 +136,7 @@ public void execute(NodeProbe probe) { ignoreDCs.add(dc); } - probe.setUnifiedRepairIgnoreDCs(repairType, ignoreDCs); + probe.setAutoRepairIgnoreDCs(repairType, ignoreDCs); break; case "repair_primary_token_range_only": probe.setPrimaryTokenRangeOnly(repairType, Boolean.parseBoolean(paramVal)); diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 14a34515dfa5..2be783fe4a9d 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -68,7 +68,7 @@ import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.io.util.File; -import org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -732,11 +732,11 @@ public static AbstractCryptoProvider newCryptoProvider(String className, Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); // first attempt to initialize with Map arguments. - return (IUnifiedRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(Map.class).newInstance(parameters); + return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(Map.class).newInstance(parameters); } catch (NoSuchMethodException nsme) { // fall back on no argument constructor. - return (IUnifiedRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); + return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); } } catch (Exception ex) { - throw new ConfigurationException("Unable to create instance of IUnifiedRepairTokenRangeSplitter for " + className, ex); + throw new ConfigurationException("Unable to create instance of IAutoRepairTokenRangeSplitter for " + className, ex); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/UnifiedRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java similarity index 83% rename from test/distributed/org/apache/cassandra/distributed/test/repair/UnifiedRepairSchedulerTest.java rename to test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 8460ec119cee..9b583f006507 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/UnifiedRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -35,14 +35,14 @@ import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.test.TestBaseImpl; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepair; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.service.AutoRepairService; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.junit.Assert.assertEquals; -public class UnifiedRepairSchedulerTest extends TestBaseImpl +public class AutoRepairSchedulerTest extends TestBaseImpl { private static Cluster cluster; @@ -57,25 +57,25 @@ public static void init() throws IOException sdf = new SimpleDateFormat(pattern); sdf.setLenient(false); cluster = Cluster.build(3).withConfig(config -> config - .set("unified_repair", + .set("auto_repair", ImmutableMap.of( "repair_type_overrides", - ImmutableMap.of(UnifiedRepairConfig.RepairType.full.toString(), - ImmutableMap.of( + ImmutableMap.of(AutoRepairConfig.RepairType.full.toString(), + ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", "parallel_repair_count", "1", "parallel_repair_percentage", "0", "min_repair_interval", "1s"), - UnifiedRepairConfig.RepairType.incremental.toString(), - ImmutableMap.of( + AutoRepairConfig.RepairType.incremental.toString(), + ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", "parallel_repair_count", "1", "parallel_repair_percentage", "0", "min_repair_interval", "1s")))) - .set("unified_repair.enabled", "true") - .set("unified_repair.repair_check_interval", "10s")).start(); + .set("auto_repair.enabled", "true") + .set("auto_repair.repair_check_interval", "10s")).start(); cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); @@ -85,7 +85,7 @@ public static void init() throws IOException public void testScheduler() throws ParseException { // ensure there was no history of previous repair runs through the scheduler - Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY), ConsistencyLevel.QUORUM); + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY), ConsistencyLevel.QUORUM); assertEquals(0, rows.length); cluster.forEach(i -> i.runOnInstance(() -> { @@ -93,9 +93,9 @@ public void testScheduler() throws ParseException { DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - UnifiedRepairService.instance.setup(); + AutoRepairService.instance.setup(); DatabaseDescriptor.setCDCOnRepairEnabled(false); - UnifiedRepair.instance.setup(); + AutoRepair.instance.setup(); } catch (Exception e) { @@ -105,13 +105,13 @@ public void testScheduler() throws ParseException // wait for a couple of minutes for repair to go through on all three nodes Uninterruptibles.sleepUninterruptibly(2, TimeUnit.MINUTES); - validate(UnifiedRepairConfig.RepairType.full.toString()); - validate(UnifiedRepairConfig.RepairType.incremental.toString()); + validate(AutoRepairConfig.RepairType.full.toString()); + validate(AutoRepairConfig.RepairType.incremental.toString()); } private void validate(String repairType) throws ParseException { - Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s where repair_type='%s'", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, repairType), ConsistencyLevel.QUORUM); + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s where repair_type='%s'", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType), ConsistencyLevel.QUORUM); assertEquals(3, rows.length); for (int node = 0; node < rows.length; node++) { diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 7177cad467c1..4b2486255adc 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -64,7 +64,7 @@ import org.apache.cassandra.io.util.File; import org.apache.cassandra.config.Config; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.commons.lang3.StringUtils; import org.junit.Assume; import org.slf4j.Logger; @@ -1369,11 +1369,11 @@ public static Map listSnapshots(ColumnFamilyStore cfs) return tagSnapshotsMap; } - // Replaces the global unified-repair config with a new config where unified-repair schedulling is enabled/disabled - public static void setUnifiedRepairEnabled(boolean enabled) throws Exception + // Replaces the global auto-repair config with a new config where auto-repair schedulling is enabled/disabled + public static void setAutoRepairEnabled(boolean enabled) throws Exception { Config config = DatabaseDescriptor.getRawConfig(); - config.unified_repair = new UnifiedRepairConfig(enabled); + config.auto_repair = new AutoRepairConfig(enabled); Field configField = DatabaseDescriptor.class.getDeclaredField("conf"); configField.setAccessible(true); configField.set(null, config); diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index 2c96f2c4602b..d62e88091635 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -114,14 +114,14 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.Config$CorruptedTombstoneStrategy", "org.apache.cassandra.config.Config$BatchlogEndpointStrategy", "org.apache.cassandra.config.Config$TombstonesMetricGranularity", - "org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig", - "org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig$Options", - "org.apache.cassandra.repair.unifiedrepair.DefaultUnifiedRepairTokenSplitter", - "org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter", - "org.apache.cassandra.repair.unifiedrepair.FullRepairState", - "org.apache.cassandra.repair.unifiedrepair.IncrementalRepairState", - "org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig$RepairType", - "org.apache.cassandra.repair.unifiedrepair.UnifiedRepairState", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig$Options", + "org.apache.cassandra.repair.autorepair.DefaultAutoRepairTokenSplitter", + "org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter", + "org.apache.cassandra.repair.autorepair.FullRepairState", + "org.apache.cassandra.repair.autorepair.IncrementalRepairState", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig$RepairType", + "org.apache.cassandra.repair.autorepair.AutoRepairState", "org.apache.cassandra.config.DatabaseDescriptor$ByteUnit", "org.apache.cassandra.config.DataRateSpec", "org.apache.cassandra.config.DataRateSpec$DataRateUnit", diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 0f1834d94f10..9a0203c301bd 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -46,7 +46,7 @@ import static org.apache.cassandra.config.YamlConfigurationLoader.SYSTEM_PROPERTY_PREFIX; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -277,7 +277,7 @@ public void fromMapTest() Map encryptionOptions = ImmutableMap.of("cipher_suites", Collections.singletonList("FakeCipher"), "optional", false, "enabled", true); - Map unifiedRepairConfig = ImmutableMap.of("enabled", true, + Map autoRepairConfig = ImmutableMap.of("enabled", true, "global_settings", ImmutableMap.of("number_of_repair_threads", 1), "repair_type_overrides", ImmutableMap.of( @@ -291,7 +291,7 @@ public void fromMapTest() .put("internode_socket_send_buffer_size", "5B") .put("internode_socket_receive_buffer_size", "5B") .put("commitlog_sync_group_window_in_ms", "42") - .put("unified_repair", unifiedRepairConfig) + .put("auto_repair", autoRepairConfig) .build(); Config config = YamlConfigurationLoader.fromMap(map, Config.class); @@ -302,9 +302,9 @@ public void fromMapTest() assertEquals(true, config.client_encryption_options.enabled); // Check a nested object assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_send_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) - assertEquals(true, config.unified_repair.enabled); - assertEquals(new DurationSpec.IntSecondsBound("6h"), config.unified_repair.getUnifiedRepairTableMaxRepairTime(UnifiedRepairConfig.RepairType.incremental)); - config.unified_repair.setMVRepairEnabled(UnifiedRepairConfig.RepairType.incremental, false); + assertEquals(true, config.auto_repair.enabled); + assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.incremental)); + config.auto_repair.setMVRepairEnabled(AutoRepairConfig.RepairType.incremental, false); } @Test @@ -565,4 +565,4 @@ public static Config load(String path) } return new YamlConfigurationLoader().loadConfig(url); } -} +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java similarity index 77% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfigTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index e7ce54a1cea5..4f1dd029ef5e 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.EnumMap; import java.util.Objects; @@ -35,7 +35,7 @@ import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.Options; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.Options; import org.apache.cassandra.utils.FBUtilities; import static org.junit.Assert.assertEquals; @@ -44,79 +44,79 @@ import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) -public class UnifiedRepairConfigTest extends CQLTester +public class AutoRepairConfigTest extends CQLTester { - private UnifiedRepairConfig config; + private AutoRepairConfig config; private Set testSet = ImmutableSet.of("dc1"); @Parameterized.Parameter - public UnifiedRepairConfig.RepairType repairType; + public AutoRepairConfig.RepairType repairType; @Parameterized.Parameters public static Object[] repairTypes() { - return UnifiedRepairConfig.RepairType.values(); + return AutoRepairConfig.RepairType.values(); } @Before public void setUp() { - config = new UnifiedRepairConfig(true); + config = new AutoRepairConfig(true); config.repair_type_overrides = null; - UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); } @Test - public void unifiedRepairConfigDefaultsAreNotNull() + public void autoRepairConfigDefaultsAreNotNull() { - UnifiedRepairConfig config = new UnifiedRepairConfig(); + AutoRepairConfig config = new AutoRepairConfig(); assertNotNull(config.global_settings); } @Test - public void unifiedRepairConfigRepairTypesAreNotNull() + public void autoRepairConfigRepairTypesAreNotNull() { - UnifiedRepairConfig config = new UnifiedRepairConfig(); - for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) + AutoRepairConfig config = new AutoRepairConfig(); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { assertNotNull(config.repair_type_overrides.get(repairType)); } } @Test - public void testIsUnifiedRepairEnabledReturnsTrueWhenRepairIsEnabled() + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsEnabled() { config.global_settings.enabled = true; - assertTrue(config.isUnifiedRepairEnabled(repairType)); + assertTrue(config.isAutoRepairEnabled(repairType)); } @Test - public void testIsUnifiedRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() { - config = new UnifiedRepairConfig(false); + config = new AutoRepairConfig(false); config.global_settings.enabled = true; - assertFalse(config.isUnifiedRepairEnabled(repairType)); + assertFalse(config.isAutoRepairEnabled(repairType)); } @Test - public void testIsUnifiedRepairEnabledReturnsTrueWhenRepairIsDisabledForRepairType() + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledForRepairType() { config.global_settings.enabled = true; - config.repair_type_overrides = new EnumMap<>(UnifiedRepairConfig.RepairType.class); + config.repair_type_overrides = new EnumMap<>(AutoRepairConfig.RepairType.class); config.repair_type_overrides.put(repairType, new Options()); config.repair_type_overrides.get(repairType).enabled = false; - assertFalse(config.isUnifiedRepairEnabled(repairType)); + assertFalse(config.isAutoRepairEnabled(repairType)); } @Test - public void testSetUnifiedRepairEnabledNoMVOrCDC() + public void testSetAutoRepairEnabledNoMVOrCDC() { DatabaseDescriptor.setCDCEnabled(false); DatabaseDescriptor.setMaterializedViewsEnabled(false); - config.setUnifiedRepairEnabled(repairType, true); + config.setAutoRepairEnabled(repairType, true); assertTrue(config.repair_type_overrides.get(repairType).enabled); } @@ -194,19 +194,19 @@ public void testSetRepairMinFrequencyInHours() } @Test - public void testGetUnifiedRepairHistoryClearDeleteHostsBufferInSec() + public void testGetAutoRepairHistoryClearDeleteHostsBufferInSec() { config.history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("5s"); - int result = config.getUnifiedRepairHistoryClearDeleteHostsBufferInterval().toSeconds(); + int result = config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds(); assertEquals(5, result); } @Test - public void testSetUnifiedRepairHistoryClearDeleteHostsBufferInSec() + public void testSetAutoRepairHistoryClearDeleteHostsBufferInSec() { - config.setUnifiedRepairHistoryClearDeleteHostsBufferInterval("5s"); + config.setAutoRepairHistoryClearDeleteHostsBufferInterval("5s"); assert Objects.equals(config.history_clear_delete_hosts_buffer_interval, new DurationSpec.IntSecondsBound("5s")); } @@ -230,19 +230,19 @@ public void testSetRepairSSTableCountHigherThreshold() } @Test - public void testGetUnifiedRepairTableMaxRepairTimeInSec() + public void testGetAutoRepairTableMaxRepairTimeInSec() { config.global_settings.table_max_repair_time = new DurationSpec.IntSecondsBound("5s"); - DurationSpec.IntSecondsBound result = config.getUnifiedRepairTableMaxRepairTime(repairType); + DurationSpec.IntSecondsBound result = config.getAutoRepairTableMaxRepairTime(repairType); assertEquals(5, result.toSeconds()); } @Test - public void testSetUnifiedRepairTableMaxRepairTimeInSec() + public void testSetAutoRepairTableMaxRepairTimeInSec() { - config.setUnifiedRepairTableMaxRepairTime(repairType, "5s"); + config.setAutoRepairTableMaxRepairTime(repairType, "5s"); assert config.repair_type_overrides.get(repairType).table_max_repair_time.toSeconds() == 5; } @@ -356,19 +356,19 @@ public void testGetForceRepairNewNode() } @Test - public void testIsUnifiedRepairSchedulingEnabledDefault() + public void testIsAutoRepairSchedulingEnabledDefault() { - config = new UnifiedRepairConfig(); + config = new AutoRepairConfig(); - boolean result = config.isUnifiedRepairSchedulingEnabled(); + boolean result = config.isAutoRepairSchedulingEnabled(); assertFalse(result); } @Test - public void testIsUnifiedRepairSchedulingEnabledTrue() + public void testIsAutoRepairSchedulingEnabledTrue() { - boolean result = config.isUnifiedRepairSchedulingEnabled(); + boolean result = config.isAutoRepairSchedulingEnabled(); assertTrue(result); } @@ -386,16 +386,16 @@ public void testGetDefaultOptionsTokenRangeSplitter() { Options defaultOptions = Options.getDefaultOptions(); - ParameterizedClass expectedDefault = new ParameterizedClass(DefaultUnifiedRepairTokenSplitter.class.getName(), Collections.emptyMap()); + ParameterizedClass expectedDefault = new ParameterizedClass(DefaultAutoRepairTokenSplitter.class.getName(), Collections.emptyMap()); assertEquals(expectedDefault, defaultOptions.token_range_splitter); - assertEquals(DefaultUnifiedRepairTokenSplitter.class.getName(), FBUtilities.newUnifiedRepairTokenRangeSplitter(defaultOptions.token_range_splitter).getClass().getName()); + assertEquals(DefaultAutoRepairTokenSplitter.class.getName(), FBUtilities.newAutoRepairTokenRangeSplitter(defaultOptions.token_range_splitter).getClass().getName()); } @Test(expected = ConfigurationException.class) public void testInvalidTokenRangeSplitter() { - FBUtilities.newUnifiedRepairTokenRangeSplitter(new ParameterizedClass("invalid-class", Collections.emptyMap())); + FBUtilities.newAutoRepairTokenRangeSplitter(new ParameterizedClass("invalid-class", Collections.emptyMap())); } @Test diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairDefaultTokenSplitterParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java similarity index 84% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairDefaultTokenSplitterParameterizedTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java index f14f6acefeac..804d0a712b48 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairDefaultTokenSplitterParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.ArrayList; import java.util.Arrays; @@ -37,8 +37,8 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.sai.disk.format.Version; -import org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter.RepairAssignment; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -48,7 +48,7 @@ import static org.junit.Assert.assertEquals; @RunWith(Parameterized.class) -public class UnifiedRepairDefaultTokenSplitterParameterizedTest +public class AutoRepairDefaultTokenSplitterParameterizedTest { private static final String KEYSPACE = "ks"; private static final String TABLE1 = "tbl1"; @@ -56,12 +56,12 @@ public class UnifiedRepairDefaultTokenSplitterParameterizedTest private static final String TABLE3 = "tbl3"; @Parameterized.Parameter() - public UnifiedRepairConfig.RepairType repairType; + public AutoRepairConfig.RepairType repairType; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() + public static Collection repairTypes() { - return Arrays.asList(UnifiedRepairConfig.RepairType.values()); + return Arrays.asList(AutoRepairConfig.RepairType.values()); } @BeforeClass @@ -83,7 +83,7 @@ public static void setupClass() throws Exception ServerTestUtils.registerLocal(tokens); // Ensure that the on-disk format statics are loaded before the test run Version.LATEST.onDiskFormat(); - StorageService.instance.doUnifiedRepairSetup(); + StorageService.instance.doAutoRepairSetup(); SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); @@ -111,7 +111,7 @@ private static void appendExpectedTokens(long left, long right, int numberOfSpli @Test public void testTokenRangesSplitByTable() { - UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairByKeyspace(repairType, false); + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); int totalTokenRanges = 3; Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(totalTokenRanges, tokens.size()); @@ -125,9 +125,9 @@ public void testTokenRangesSplitByTable() appendExpectedTokens(256, 1024, numberOfSplits, expectedToken); } - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairSubRangeNum(repairType, numberOfSplits); - List assignments = new DefaultUnifiedRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); + List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); assertEquals(totalTokenRanges * numberOfSplits * tables.size(), assignments.size()); assertEquals(expectedToken.size(), assignments.size()); @@ -146,7 +146,7 @@ public void testTokenRangesSplitByTable() @Test public void testTokenRangesSplitByKeyspace() { - UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairByKeyspace(repairType, true); + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); int totalTokenRanges = 3; Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(totalTokenRanges, tokens.size()); @@ -157,9 +157,9 @@ public void testTokenRangesSplitByKeyspace() appendExpectedTokens(0, 256, numberOfSplits, expectedToken); appendExpectedTokens(256, 1024, numberOfSplits, expectedToken); - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairSubRangeNum(repairType, numberOfSplits); - List assignments = new DefaultUnifiedRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); + List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); assertEquals(totalTokenRanges * numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java similarity index 87% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairKeyspaceTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java index 748cc88b2a2e..1337cf3dd2d3 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.HashSet; import java.util.Iterator; @@ -29,7 +29,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; -public class UnifiedRepairKeyspaceTest +public class AutoRepairKeyspaceTest { @BeforeClass public static void setupDatabaseDescriptor() @@ -38,7 +38,7 @@ public static void setupDatabaseDescriptor() } @Test - public void testEnsureUnifiedRepairTablesArePresent() + public void testEnsureAutoRepairTablesArePresent() { KeyspaceMetadata keyspaceMetadata = SystemDistributedKeyspace.metadata(); Iterator iter = keyspaceMetadata.tables.iterator(); @@ -48,7 +48,7 @@ public void testEnsureUnifiedRepairTablesArePresent() actualDistributedTablesIter.add(iter.next().name); } - Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY)); - Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY)); + Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); + Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); } } diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java similarity index 52% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairParameterizedTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index b080cede0c9c..c7533c6dba0e 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.ArrayList; import java.util.Arrays; @@ -37,7 +37,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.repair.unifiedrepair.IUnifiedRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.service.StorageService; @@ -56,20 +56,20 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.metrics.UnifiedRepairMetricsManager; -import org.apache.cassandra.metrics.UnifiedRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.metrics.AutoRepairMetrics; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.FBUtilities; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; -import static org.apache.cassandra.Util.setUnifiedRepairEnabled; +import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.RepairTurn.NOT_MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -82,43 +82,43 @@ import static org.mockito.Mockito.when; @RunWith(Parameterized.class) -public class UnifiedRepairParameterizedTest extends CQLTester +public class AutoRepairParameterizedTest extends CQLTester { private static final String KEYSPACE = "ks"; private static final String TABLE = "tbl"; - private static final String TABLE_DISABLED_UNIFIED_REPAIR = "tbl_disabled_unified_repair"; + private static final String TABLE_DISABLED_AUTO_REPAIR = "tbl_disabled_auto_repair"; private static final String MV = "mv"; private static TableMetadata cfm; - private static TableMetadata cfmDisabledUnifiedRepair; + private static TableMetadata cfmDisabledAutoRepair; private static Keyspace keyspace; private static int timeFuncCalls; @Mock ScheduledExecutorPlus mockExecutor; @Mock - UnifiedRepairState unifiedRepairState; + AutoRepairState autoRepairState; @Mock RepairCoordinator repairRunnable; - private static UnifiedRepairConfig defaultConfig; + private static AutoRepairConfig defaultConfig; @Parameterized.Parameter() - public UnifiedRepairConfig.RepairType repairType; + public AutoRepairConfig.RepairType repairType; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() + public static Collection repairTypes() { - return Arrays.asList(UnifiedRepairConfig.RepairType.values()); + return Arrays.asList(AutoRepairConfig.RepairType.values()); } @BeforeClass public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); - setUnifiedRepairEnabled(true); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + setAutoRepairEnabled(true); requireNetwork(); - UnifiedRepairUtils.setup(); - StorageService.instance.doUnifiedRepairSetup(); + AutoRepairUtils.setup(); + StorageService.instance.doAutoRepairSetup(); DatabaseDescriptor.setCDCEnabled(false); } @@ -128,7 +128,7 @@ public void setup() SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", KEYSPACE, TABLE)); - QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH repair_full = {'enabled': 'false'} AND repair_incremental = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_UNIFIED_REPAIR)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH repair_full = {'enabled': 'false'} AND repair_incremental = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); @@ -143,23 +143,23 @@ public void setup() Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).truncateBlocking(); Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).disableAutoCompaction(); - Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY).truncateBlocking(); - Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY).truncateBlocking(); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY).truncateBlocking(); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY).truncateBlocking(); - UnifiedRepair.instance = new UnifiedRepair(); + AutoRepair.instance = new AutoRepair(); executeCQL(); timeFuncCalls = 0; - UnifiedRepair.timeFunc = System::currentTimeMillis; + AutoRepair.timeFunc = System::currentTimeMillis; resetCounters(); resetConfig(); - UnifiedRepair.shuffleFunc = java.util.Collections::shuffle; + AutoRepair.shuffleFunc = java.util.Collections::shuffle; keyspace = Keyspace.open(KEYSPACE); cfm = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).metadata(); - cfmDisabledUnifiedRepair = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE_DISABLED_UNIFIED_REPAIR).metadata(); + cfmDisabledAutoRepair = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE_DISABLED_AUTO_REPAIR).metadata(); DatabaseDescriptor.setCDCOnRepairEnabled(false); } @@ -171,7 +171,7 @@ public void tearDown() private void resetCounters() { - UnifiedRepairMetrics metrics = UnifiedRepairMetricsManager.getMetrics(repairType); + AutoRepairMetrics metrics = AutoRepairMetricsManager.getMetrics(repairType); Metrics.removeMatching((name, metric) -> name.startsWith("repairTurn")); metrics.repairTurnMyTurn = Metrics.counter(String.format("repairTurnMyTurn-%s", repairType)); metrics.repairTurnMyTurnForceRepair = Metrics.counter(String.format("repairTurnMyTurnForceRepair-%s", repairType)); @@ -181,15 +181,15 @@ private void resetCounters() private void resetConfig() { // prepare a fresh default config - defaultConfig = new UnifiedRepairConfig(true); - for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) + defaultConfig = new AutoRepairConfig(true); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { - defaultConfig.setUnifiedRepairEnabled(repairType, true); + defaultConfig.setAutoRepairEnabled(repairType, true); defaultConfig.setMVRepairEnabled(repairType, false); } - // reset the UnifiedRepairService config to default - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + // reset the AutoRepairService config to default + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.repair_type_overrides = defaultConfig.repair_type_overrides; config.global_settings = defaultConfig.global_settings; config.history_clear_delete_hosts_buffer_interval = defaultConfig.history_clear_delete_hosts_buffer_interval; @@ -201,24 +201,24 @@ private void executeCQL() QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, s) VALUES ('k', 's')"); QueryProcessor.executeInternal("SELECT s FROM ks.tbl WHERE k='k'"); Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) - .getColumnFamilyStore(SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY) + .getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY) .forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); } @Test(expected = ConfigurationException.class) public void testRepairAsyncWithRepairTypeDisabled() { - UnifiedRepairService.instance.getUnifiedRepairConfig().setUnifiedRepairEnabled(repairType, false); + AutoRepairService.instance.getAutoRepairConfig().setAutoRepairEnabled(repairType, false); - UnifiedRepair.instance.repairAsync(repairType); + AutoRepair.instance.repairAsync(repairType); } @Test public void testRepairAsync() { - UnifiedRepair.instance.repairExecutors.put(repairType, mockExecutor); + AutoRepair.instance.repairExecutors.put(repairType, mockExecutor); - UnifiedRepair.instance.repairAsync(repairType); + AutoRepair.instance.repairAsync(repairType); verify(mockExecutor, Mockito.times(1)).submit(Mockito.any(Runnable.class)); @@ -228,17 +228,17 @@ public void testRepairAsync() public void testRepairTurn() { UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", UnifiedRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); } @Test public void testRepair() { - UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairMinInterval(repairType, "0s"); - UnifiedRepair.instance.repair(repairType); - assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - long lastRepairTime = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + long lastRepairTime = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); //if repair was done then lastRepairTime should be non-zero Assert.assertTrue(String.format("Expected lastRepairTime > 0, actual value lastRepairTime %d", lastRepairTime), lastRepairTime > 0); @@ -247,113 +247,113 @@ public void testRepair() @Test public void testTooFrequentRepairs() { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); //in the first round let repair run config.setRepairMinInterval(repairType, "0s"); - UnifiedRepair.instance.repair(repairType); - long lastRepairTime1 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); - int consideredTables = UnifiedRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + AutoRepair.instance.repair(repairType); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), consideredTables, 0); //if repair was done in last 24 hours then it should not trigger another repair config.setRepairMinInterval(repairType, "24h"); - UnifiedRepair.instance.repair(repairType); - long lastRepairTime2 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); + AutoRepair.instance.repair(repairType); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertEquals(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); - assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); } @Test public void testNonFrequentRepairs() { - Integer prevMetricsCount = UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); - UnifiedRepairState state = UnifiedRepair.instance.repairStates.get(repairType); + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); long prevCount = state.getTotalMVTablesConsideredForRepair(); - UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairMinInterval(repairType, "0s"); - UnifiedRepair.instance.repair(repairType); - long lastRepairTime1 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertTrue(String.format("Expected lastRepairTime1 > 0, actual value lastRepairTime1 %d", lastRepairTime1), lastRepairTime1 > 0); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); Assert.assertTrue("Expected my turn for the repair", - UnifiedRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); - UnifiedRepair.instance.repair(repairType); - long lastRepairTime2 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); + AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + AutoRepair.instance.repair(repairType); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertNotSame(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + "lastRepairTime2 ", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); - assertEquals(prevMetricsCount, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @Test public void testGetPriorityHosts() { - Integer prevMetricsCount = UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); - UnifiedRepairState state = UnifiedRepair.instance.repairStates.get(repairType); + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); long prevCount = state.getTotalMVTablesConsideredForRepair(); - UnifiedRepairService.instance.getUnifiedRepairConfig().setRepairMinInterval(repairType, "0s"); + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); Assert.assertSame(String.format("Priority host count is not same, actual value %d, expected value %d", - UnifiedRepairUtils.getPriorityHosts(repairType).size(), 0), UnifiedRepairUtils.getPriorityHosts(repairType).size(), 0); + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", UnifiedRepairUtils.myTurnToRunRepair(repairType, myId) != + Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); - UnifiedRepair.instance.repair(repairType); - UnifiedRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); + AutoRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); + AutoRepair.instance.repair(repairType); Assert.assertSame(String.format("Priority host count is not same actual value %d, expected value %d", - UnifiedRepairUtils.getPriorityHosts(repairType).size(), 0), UnifiedRepairUtils.getPriorityHosts(repairType).size(), 0); + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); - assertEquals(prevMetricsCount, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @Test - public void testCheckUnifiedRepairStartStop() throws Throwable + public void testCheckAutoRepairStartStop() throws Throwable { - Integer prevMetricsCount = UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); - UnifiedRepairState state = UnifiedRepair.instance.repairStates.get(repairType); - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); long prevCount = state.getTotalMVTablesConsideredForRepair(); config.setRepairMinInterval(repairType, "0s"); - config.setUnifiedRepairEnabled(repairType, false); - long lastRepairTime1 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); - UnifiedRepair.instance.repair(repairType); - long lastRepairTime2 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); + config.setAutoRepairEnabled(repairType, false); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + AutoRepair.instance.repair(repairType); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); //Since repair has not happened, both the last repair times should be same Assert.assertEquals(String.format("Expected lastRepairTime1 %d, and lastRepairTime2 %d to be same", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); - config.setUnifiedRepairEnabled(repairType, true); - UnifiedRepair.instance.repair(repairType); + config.setAutoRepairEnabled(repairType, true); + AutoRepair.instance.repair(repairType); //since repair is done now, so lastRepairTime1/lastRepairTime2 and lastRepairTime3 should not be same - long lastRepairTime3 = UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime(); + long lastRepairTime3 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertNotSame(String.format("Expected lastRepairTime1 %d, and lastRepairTime3 %d to be not same", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime3); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); - assertEquals(prevMetricsCount, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @Test public void testRepairPrimaryRangesByDefault() { Assert.assertTrue("Expected primary range repair only", - UnifiedRepairService.instance.getUnifiedRepairConfig().getRepairPrimaryTokenRangeOnly(repairType)); + AutoRepairService.instance.getAutoRepairConfig().getRepairPrimaryTokenRangeOnly(repairType)); } @Test public void testGetAllMVs() { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMVRepairEnabled(repairType, false); assertFalse(config.getMVRepairEnabled(repairType)); - assertEquals(0, UnifiedRepairUtils.getAllMVs(repairType, keyspace, cfm).size()); + assertEquals(0, AutoRepairUtils.getAllMVs(repairType, keyspace, cfm).size()); config.setMVRepairEnabled(repairType, true); assertTrue(config.getMVRepairEnabled(repairType)); - assertEquals(Arrays.asList(MV), UnifiedRepairUtils.getAllMVs(repairType, keyspace, cfm)); + assertEquals(Arrays.asList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); config.setMVRepairEnabled(repairType, false); } @@ -361,32 +361,32 @@ public void testGetAllMVs() @Test public void testMVRepair() { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMVRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); - UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - UnifiedRepair.instance.repair(repairType); - assertEquals(1, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + AutoRepair.instance.repair(repairType); + assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); config.setMVRepairEnabled(repairType, false); - UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - UnifiedRepair.instance.repair(repairType); - assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + AutoRepair.instance.repair(repairType); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); config.setMVRepairEnabled(repairType, true); - UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); - UnifiedRepair.instance.repair(repairType); - assertEquals(1, UnifiedRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); - assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + AutoRepair.instance.repair(repairType); + assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); } @Test public void testSkipRepairSSTableCountHigherThreshold() { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); - UnifiedRepairState state = UnifiedRepair.instance.repairStates.get(repairType); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); ColumnFamilyStore cfsBaseTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE); ColumnFamilyStore cfsMVTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(MV); Set preBaseTable = cfsBaseTable.getLiveSSTables(); @@ -414,89 +414,89 @@ public void testSkipRepairSSTableCountHigherThreshold() config.setMVRepairEnabled(repairType, true); config.setRepairSSTableCountHigherThreshold(repairType, 9); assertEquals(0, state.getSkippedTokenRangesCount()); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); state.setLastRepairTime(0); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); assertEquals(0, state.getTotalMVTablesConsideredForRepair()); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); // skipping both the tables - one table is due to its repair has been disabled, and another one due to high sstable count assertEquals(0, state.getSkippedTokenRangesCount()); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); assertEquals(2, state.getSkippedTablesCount()); - assertEquals(2, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertEquals(2, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); // set it to higher value, and this time, the tables should not be skipped config.setRepairSSTableCountHigherThreshold(repairType, beforeCount); state.setLastRepairTime(0); state.setSkippedTablesCount(0); state.setTotalMVTablesConsideredForRepair(0); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); assertEquals(1, state.getTotalMVTablesConsideredForRepair()); - assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); assertEquals(0, state.getSkippedTokenRangesCount()); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); assertEquals(1, state.getSkippedTablesCount()); - assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); } @Test public void testGetRepairState() { - assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getRepairKeyspaceCount()); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getRepairKeyspaceCount()); - UnifiedRepairState state = UnifiedRepair.instance.getRepairState(repairType); + AutoRepairState state = AutoRepair.instance.getRepairState(repairType); state.setRepairKeyspaceCount(100); - assertEquals(100L, UnifiedRepair.instance.getRepairState(repairType).getRepairKeyspaceCount()); + assertEquals(100L, AutoRepair.instance.getRepairState(repairType).getRepairKeyspaceCount()); } @Test public void testMetrics() { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMVRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); - config.setUnifiedRepairTableMaxRepairTime(repairType, "0s"); - UnifiedRepair.timeFunc = () -> { + config.setAutoRepairTableMaxRepairTime(repairType, "0s"); + AutoRepair.timeFunc = () -> { timeFuncCalls++; return timeFuncCalls * 1000L; }; - UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(1000L); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(1000L); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); - assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).nodeRepairTimeInSec.getValue() > 0); - assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).clusterRepairTimeInSec.getValue() > 0); - assertEquals(1, UnifiedRepairMetricsManager.getMetrics(repairType).repairTurnMyTurn.getCount()); - assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue() > 0); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue().intValue()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).nodeRepairTimeInSec.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).clusterRepairTimeInSec.getValue() > 0); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).repairTurnMyTurn.getCount()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue() > 0); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue().intValue()); - config.setUnifiedRepairTableMaxRepairTime(repairType, String.valueOf(Integer.MAX_VALUE - 1) + 's'); - UnifiedRepair.instance.repairStates.put(repairType, unifiedRepairState); - when(unifiedRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) + config.setAutoRepairTableMaxRepairTime(repairType, String.valueOf(Integer.MAX_VALUE-1) + 's'); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); - when(unifiedRepairState.getFailedTokenRangesCount()).thenReturn(10); - when(unifiedRepairState.getSucceededTokenRangesCount()).thenReturn(11); - when(unifiedRepairState.getLongestUnrepairedSec()).thenReturn(10); + when(autoRepairState.getFailedTokenRangesCount()).thenReturn(10); + when(autoRepairState.getSucceededTokenRangesCount()).thenReturn(11); + when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); - UnifiedRepair.instance.repair(repairType); - assertEquals(0, UnifiedRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); - assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue() > 0); - assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).succeededTokenRangesCount.getValue() > 0); - assertTrue(UnifiedRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue() > 0); + AutoRepair.instance.repair(repairType); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).succeededTokenRangesCount.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue() > 0); } @Test public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws Exception { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMVRepairEnabled(repairType, false); config.setRepairRetryBackoff("0s"); - when(unifiedRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); - UnifiedRepair.instance.repairStates.put(repairType, unifiedRepairState); - when(unifiedRepairState.getLastRepairTime()).thenReturn((long) 0); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + when(autoRepairState.getLastRepairTime()).thenReturn((long) 0); AtomicInteger resetWaitConditionCalls = new AtomicInteger(); AtomicInteger waitForRepairCompletedCalls = new AtomicInteger(); doAnswer(invocation -> { @@ -504,36 +504,36 @@ public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws assertEquals("waitForRepairToComplete was called before resetWaitCondition", resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get() + 1); return null; - }).when(unifiedRepairState).resetWaitCondition(); + }).when(autoRepairState).resetWaitCondition(); doAnswer(invocation -> { waitForRepairCompletedCalls.getAndIncrement(); assertEquals("resetWaitCondition was not called before waitForRepairToComplete", resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get()); return null; - }).when(unifiedRepairState).waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); + }).when(autoRepairState).waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); - UnifiedRepair.instance.repair(repairType); - UnifiedRepair.instance.repair(repairType); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); } @Test - public void testDisabledUnifiedRepairForATableThroughTableLevelConfiguration() + public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() { - Assert.assertTrue(cfm.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).repairEnabled()); - Assert.assertTrue(cfm.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).repairEnabled()); - Assert.assertFalse(cfmDisabledUnifiedRepair.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).repairEnabled()); - Assert.assertFalse(cfmDisabledUnifiedRepair.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairMinInterval(repairType, "0s"); - int disabledTablesRepairCountBefore = UnifiedRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); - UnifiedRepair.instance.repair(repairType); - int consideredTables = UnifiedRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + int disabledTablesRepairCountBefore = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); + AutoRepair.instance.repair(repairType); + int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), consideredTables, 0); - int disabledTablesRepairCountAfter = UnifiedRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); - Assert.assertTrue(String.format("A table %s should be skipped from unified repair, expected value: %d, actual value %d ", TABLE_DISABLED_UNIFIED_REPAIR, disabledTablesRepairCountBefore + 1, disabledTablesRepairCountAfter), + int disabledTablesRepairCountAfter = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); + Assert.assertTrue(String.format("A table %s should be skipped from auto repair, expected value: %d, actual value %d ", TABLE_DISABLED_AUTO_REPAIR, disabledTablesRepairCountBefore + 1, disabledTablesRepairCountAfter), disabledTablesRepairCountBefore < disabledTablesRepairCountAfter); } @@ -544,7 +544,7 @@ public void testTokenRangesNoSplit() assertEquals(1, tokens.size()); List> expectedToken = new ArrayList<>(tokens); - List assignments = new DefaultUnifiedRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, Collections.singletonList(TABLE)); + List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, Collections.singletonList(TABLE)); assertEquals(1, assignments.size()); assertEquals(expectedToken.get(0).left, assignments.get(0).getTokenRange().left); assertEquals(expectedToken.get(0).right, assignments.get(0).getTokenRange().right); @@ -558,12 +558,12 @@ public void testTableAttribute() } @Test - public void testDefaultUnifiedRepair() + public void testDefaultAutomatedRepair() { - Assert.assertTrue(cfm.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).repairEnabled()); - Assert.assertTrue(cfm.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).repairEnabled()); - Assert.assertFalse(cfmDisabledUnifiedRepair.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.full).repairEnabled()); - Assert.assertFalse(cfmDisabledUnifiedRepair.params.unifiedRepair.get(UnifiedRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); } @Test @@ -571,7 +571,7 @@ public void testRepairShufflesKeyspacesAndTables() { AtomicInteger shuffleKeyspacesCall = new AtomicInteger(); AtomicInteger shuffleTablesCall = new AtomicInteger(); - UnifiedRepair.shuffleFunc = (List list) -> { + AutoRepair.shuffleFunc = (List list) -> { if (!list.isEmpty()) { assertTrue(list.get(0) instanceof Keyspace || list.get(0) instanceof String); @@ -587,9 +587,9 @@ else if (list.get(0) instanceof String) } }; - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairMinInterval(repairType, "0s"); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); assertEquals(1, shuffleKeyspacesCall.get()); assertEquals(5, shuffleTablesCall.get()); @@ -598,88 +598,88 @@ else if (list.get(0) instanceof String) @Test public void testRepairTakesLastRepairTimeFromDB() { - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMVRepairEnabled(repairType, true); long lastRepairTime = System.currentTimeMillis() - 1000; - UnifiedRepairUtils.insertNewRepairHistory(repairType, 0, lastRepairTime); - UnifiedRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + AutoRepairUtils.insertNewRepairHistory(repairType, 0, lastRepairTime); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); config.setRepairMinInterval(repairType, "1h"); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); // repair scheduler should not attempt to run repair as last repair time in DB is current time - 1s - assertEquals(0, UnifiedRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair()); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair()); // repair scheduler should load the repair time from the DB - assertEquals(lastRepairTime, UnifiedRepair.instance.repairStates.get(repairType).getLastRepairTime()); + assertEquals(lastRepairTime, AutoRepair.instance.repairStates.get(repairType).getLastRepairTime()); } @Test public void testRepairMaxRetries() { - when(unifiedRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); - when(unifiedRepairState.isSuccess()).thenReturn(false); - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + when(autoRepairState.isSuccess()).thenReturn(false); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); AtomicInteger sleepCalls = new AtomicInteger(); - UnifiedRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.SECONDS, unit); assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); }; config.setRepairMinInterval(repairType, "0s"); - UnifiedRepair.instance.repairStates.put(repairType, unifiedRepairState); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); //system_auth.role_permissions,system_auth.network_permissions,system_auth.role_members,system_auth.roles, // system_auth.resource_role_permissons_index,system_traces.sessions,system_traces.events,ks.tbl, - // system_distributed.unified_repair_priority,system_distributed.repair_history,system_distributed.unified_repair_history, + // system_distributed.auto_repair_priority,system_distributed.repair_history,system_distributed.auto_repair_history, // system_distributed.view_build_status,system_distributed.parent_repair_history,system_distributed.partition_denylist int exptedTablesGoingThroughRepair = 18; assertEquals(config.getRepairMaxRetries()*exptedTablesGoingThroughRepair, sleepCalls.get()); - verify(unifiedRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); - verify(unifiedRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(unifiedRepairState, Mockito.times(1)).setFailedTokenRangesCount(exptedTablesGoingThroughRepair); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(exptedTablesGoingThroughRepair); } @Test public void testRepairSuccessAfterRetry() { - when(unifiedRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); - UnifiedRepairConfig config = UnifiedRepairService.instance.getUnifiedRepairConfig(); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); AtomicInteger sleepCalls = new AtomicInteger(); - UnifiedRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.SECONDS, unit); assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); }; - when(unifiedRepairState.isSuccess()).then((invocationOnMock) -> { + when(autoRepairState.isSuccess()).then((invocationOnMock) -> { if (sleepCalls.get() == 0) { return false; } return true; }); config.setRepairMinInterval(repairType, "0s"); - UnifiedRepair.instance.repairStates.put(repairType, unifiedRepairState); - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(unifiedRepairState, Mockito.times(1)).setSucceededTokenRangesCount(18); - verify(unifiedRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(unifiedRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(18); + verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } @Test public void testRepairThrowsForIRWithMVReplay() { - UnifiedRepair.instance.setup(); + AutoRepair.instance.setup(); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - if (repairType == UnifiedRepairConfig.RepairType.incremental) + if (repairType == AutoRepairConfig.RepairType.incremental) { try { - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); fail("Expected ConfigurationException"); } catch (ConfigurationException ignored) @@ -688,7 +688,7 @@ public void testRepairThrowsForIRWithMVReplay() } else { - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); } } @@ -696,14 +696,14 @@ public void testRepairThrowsForIRWithMVReplay() @Test public void testRepairThrowsForIRWithCDCReplay() { - UnifiedRepair.instance.setup(); + AutoRepair.instance.setup(); DatabaseDescriptor.setCDCOnRepairEnabled(true); - if (repairType == UnifiedRepairConfig.RepairType.incremental) + if (repairType == AutoRepairConfig.RepairType.incremental) { try { - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); fail("Expected ConfigurationException"); } catch (ConfigurationException ignored) @@ -712,7 +712,7 @@ public void testRepairThrowsForIRWithCDCReplay() } else { - UnifiedRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); } } } diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java similarity index 76% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateFactoryTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java index c1ee56e1065d..a0e5bdc45294 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateFactoryTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -16,24 +16,24 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import org.junit.Test; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -public class UnifiedRepairStateFactoryTest +public class AutoRepairStateFactoryTest { @Test public void testGetRepairState() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(RepairType.full); + AutoRepairState state = RepairType.getAutoRepairState(RepairType.full); assert state instanceof FullRepairState; - state = RepairType.getUnifiedRepairState(RepairType.incremental); + state = RepairType.getAutoRepairState(RepairType.incremental); assert state instanceof IncrementalRepairState; } @@ -42,7 +42,7 @@ public void testGetRepairState() { public void testGetRepairStateSupportsAllRepairTypes() { for (RepairType repairType : RepairType.values()) { try { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); assertNotNull(state); } catch (IllegalArgumentException e) { assertNull(e); diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java similarity index 72% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index df60ffc4e484..f0974dd83c1a 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.Arrays; import java.util.Collection; @@ -33,9 +33,9 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.UnifiedRepairHistory; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.progress.ProgressEvent; import org.apache.cassandra.utils.progress.ProgressEventType; @@ -50,7 +50,7 @@ import static org.mockito.MockitoAnnotations.initMocks; @RunWith(Parameterized.class) -public class UnifiedRepairStateTest extends CQLTester +public class AutoRepairStateTest extends CQLTester { private static final String testTable = "test"; @@ -68,15 +68,15 @@ public static Collection repairTypes() @Before public void setUp() { - UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); initMocks(this); createTable(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int)", KEYSPACE, testTable)); } @Test public void testGetRepairRunnable() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); - UnifiedRepairService.setup(); + AutoRepairState state = RepairType.getAutoRepairState(repairType); + AutoRepairService.setup(); Runnable runnable = state.getRepairRunnable(KEYSPACE, ImmutableList.of(testTable), ImmutableSet.of(), false); @@ -86,7 +86,7 @@ public void testGetRepairRunnable() { @Test public void testProgressError() throws InterruptedException { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); state.progress("test", progressEvent); @@ -98,7 +98,7 @@ public void testProgressError() throws InterruptedException @Test public void testProgress_progress() throws InterruptedException { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); when(progressEvent.getType()).thenReturn(ProgressEventType.PROGRESS); state.progress("test", progressEvent); @@ -111,7 +111,7 @@ public void testProgress_progress() throws InterruptedException @Test public void testProgress_complete() throws InterruptedException { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); when(progressEvent.getType()).thenReturn(ProgressEventType.COMPLETE); state.progress("test", progressEvent); @@ -123,7 +123,7 @@ public void testProgress_complete() throws InterruptedException @Test public void testWaitForRepairToComplete() throws Exception { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.condition.signalAll(); Condition finishedCondition = Condition.newOneTimeCondition(); Callable waitForRepairToComplete = () -> { @@ -139,7 +139,7 @@ public void testWaitForRepairToComplete() throws Exception @Test public void testGetLastRepairTime() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.lastRepairTimeInMs = 1; assertEquals(1, state.getLastRepairTime()); @@ -147,7 +147,7 @@ public void testGetLastRepairTime() { @Test public void testSetTotalTablesConsideredForRepair() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setTotalTablesConsideredForRepair(1); @@ -156,7 +156,7 @@ public void testSetTotalTablesConsideredForRepair() { @Test public void testGetTotalTablesConsideredForRepair() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.totalTablesConsideredForRepair = 1; assertEquals(1, state.getTotalTablesConsideredForRepair()); @@ -164,7 +164,7 @@ public void testGetTotalTablesConsideredForRepair() { @Test public void testSetLastRepairTimeInMs() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setLastRepairTime(1); @@ -173,7 +173,7 @@ public void testSetLastRepairTimeInMs() { @Test public void testGetClusterRepairTimeInSec() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.clusterRepairTimeInSec = 1; assertEquals(1, state.getClusterRepairTimeInSec()); @@ -181,7 +181,7 @@ public void testGetClusterRepairTimeInSec() { @Test public void testGetNodeRepairTimeInSec() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.nodeRepairTimeInSec = 1; assertEquals(1, state.getNodeRepairTimeInSec()); @@ -189,7 +189,7 @@ public void testGetNodeRepairTimeInSec() { @Test public void testSetRepairInProgress() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setRepairInProgress(true); @@ -198,7 +198,7 @@ public void testSetRepairInProgress() { @Test public void testIsRepairInProgress() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.repairInProgress = true; assertTrue(state.isRepairInProgress()); @@ -206,7 +206,7 @@ public void testIsRepairInProgress() { @Test public void testSetSkippedTokenRangesCount() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setSkippedTokenRangesCount(1); @@ -215,7 +215,7 @@ public void testSetSkippedTokenRangesCount() { @Test public void testGetSkippedTokenRangesCount() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.skippedTokenRangesCount = 1; assertEquals(1, state.getSkippedTokenRangesCount()); @@ -223,7 +223,7 @@ public void testGetSkippedTokenRangesCount() { @Test public void testGetLongestUnrepairedSecNull() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.longestUnrepairedNode = null; try @@ -236,10 +236,10 @@ public void testGetLongestUnrepairedSecNull() { @Test public void testGetLongestUnrepairedSec() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); - state.longestUnrepairedNode = new UnifiedRepairHistory(UUID.randomUUID(), "", 0, 1000, - null, 0, false); - UnifiedRepairState.timeFunc = () -> 2000L; + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.longestUnrepairedNode = new AutoRepairHistory(UUID.randomUUID(), "", 0, 1000, + null, 0, false); + AutoRepairState.timeFunc = () -> 2000L; try { @@ -251,7 +251,7 @@ public void testGetLongestUnrepairedSec() { @Test public void testSetTotalMVTablesConsideredForRepair() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setTotalMVTablesConsideredForRepair(1); @@ -260,7 +260,7 @@ public void testSetTotalMVTablesConsideredForRepair() { @Test public void testGetTotalMVTablesConsideredForRepair() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.totalMVTablesConsideredForRepair = 1; assertEquals(1, state.getTotalMVTablesConsideredForRepair()); @@ -268,7 +268,7 @@ public void testGetTotalMVTablesConsideredForRepair() { @Test public void testSetNodeRepairTimeInSec() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setNodeRepairTimeInSec(1); @@ -277,7 +277,7 @@ public void testSetNodeRepairTimeInSec() { @Test public void testSetClusterRepairTimeInSec() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setClusterRepairTimeInSec(1); @@ -286,7 +286,7 @@ public void testSetClusterRepairTimeInSec() { @Test public void testSetRepairKeyspaceCount() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setRepairKeyspaceCount(1); @@ -294,7 +294,7 @@ public void testSetRepairKeyspaceCount() { } @Test public void testGetRepairKeyspaceCount() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.repairKeyspaceCount = 1; assertEquals(1, state.getRepairKeyspaceCount()); @@ -302,8 +302,8 @@ public void testGetRepairKeyspaceCount() { @Test public void testSetLongestUnrepairedNode() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); - UnifiedRepairHistory history = new UnifiedRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); + AutoRepairState state = RepairType.getAutoRepairState(repairType); + AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); state.setLongestUnrepairedNode(history); @@ -312,7 +312,7 @@ public void testSetLongestUnrepairedNode() { @Test public void testSetSucceededTokenRangesCount() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setSucceededTokenRangesCount(1); @@ -321,7 +321,7 @@ public void testSetSucceededTokenRangesCount() { @Test public void testGetSucceededTokenRangesCount() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.succeededTokenRangesCount = 1; assertEquals(1, state.getSucceededTokenRangesCount()); @@ -329,7 +329,7 @@ public void testGetSucceededTokenRangesCount() { @Test public void testSetFailedTokenRangesCount() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setFailedTokenRangesCount(1); @@ -338,7 +338,7 @@ public void testSetFailedTokenRangesCount() { @Test public void testGetFailedTokenRangesCount() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.failedTokenRangesCount = 1; assertEquals(1, state.getFailedTokenRangesCount()); @@ -346,7 +346,7 @@ public void testGetFailedTokenRangesCount() { @Test public void isSuccess() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.success = true; assertTrue(state.isSuccess()); @@ -359,7 +359,7 @@ public void isSuccess() { @Test public void testWaitForRepairToCompleteDoesNotSetSuccessWhenProgressReceivesError() throws InterruptedException { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); state.progress("test", progressEvent); @@ -372,7 +372,7 @@ public void testWaitForRepairToCompleteDoesNotSetSuccessWhenProgressReceivesErro @Test public void testResetWaitCondition() { - UnifiedRepairState state = RepairType.getUnifiedRepairState(repairType); + AutoRepairState state = RepairType.getAutoRepairState(repairType); state.condition.signalAll(); assertTrue(state.condition.isSignalled()); diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java similarity index 79% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 4e6e755a09f4..df9f105b615f 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.HashMap; import java.util.Map; @@ -34,38 +34,38 @@ import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.ReplicationParams; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.schema.SchemaTestUtil; -import org.apache.cassandra.service.UnifiedRepairService; +import org.apache.cassandra.service.AutoRepairService; -import static org.apache.cassandra.Util.setUnifiedRepairEnabled; +import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class UnifiedRepairTest extends CQLTester +public class AutoRepairTest extends CQLTester { @BeforeClass public static void setupClass() throws Exception { - setUnifiedRepairEnabled(true); + setAutoRepairEnabled(true); requireNetwork(); } @Before public void setup() { - UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(RepairType.full, true); - DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(RepairType.incremental, true); - UnifiedRepairService.setup(); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.full, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + AutoRepairService.setup(); } @Test public void testSetup() { - UnifiedRepair instance = new UnifiedRepair(); + AutoRepair instance = new AutoRepair(); instance.setup(); assertEquals(RepairType.values().length, instance.repairExecutors.size()); @@ -81,7 +81,7 @@ public void testSetup() @Test public void testSafeGuardSetupCall() { - UnifiedRepair instance = new UnifiedRepair(); + AutoRepair instance = new AutoRepair(); // only one should be setup, and rest should be ignored instance.setup(); @@ -101,22 +101,22 @@ public void testSafeGuardSetupCall() @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithCDCReplay() { - DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); - UnifiedRepair instance = new UnifiedRepair(); + AutoRepair instance = new AutoRepair(); instance.setup(); } @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithMVReplay() { - DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - UnifiedRepair instance = new UnifiedRepair(); + AutoRepair instance = new AutoRepair(); instance.setup(); } @@ -141,14 +141,14 @@ public void testCheckNTSreplicationNodeInsideOutsideDC() // case 1 : // node reside in "datacenter1" // keyspace has replica in "datacenter1" - Assert.assertTrue(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + Assert.assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); } else if (ks.getName().equals(ksname2)) { // case 2 : // node reside in "datacenter1" // keyspace has replica in "datacenter2" - Assert.assertFalse(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + Assert.assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); } } } diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java similarity index 70% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtilsTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 28234f640223..3629c03041ab 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/UnifiedRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.util.List; import java.util.Set; @@ -37,9 +37,9 @@ import org.apache.cassandra.locator.IEndpointSnitch; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Locator; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig.RepairType; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.UnifiedRepairHistory; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.CurrentRepairStatus; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.CurrentRepairStatus; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; @@ -51,14 +51,14 @@ import org.mockito.Mock; import org.mockito.MockitoAnnotations; -import static org.apache.cassandra.Util.setUnifiedRepairEnabled; +import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_DELETE_HOSTS; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_FORCE_REPAIR; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_REPAIR_FINISH_TS; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_REPAIR_PRIORITY; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_REPAIR_START_TS; -import static org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils.COL_REPAIR_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_DELETE_HOSTS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_FORCE_REPAIR; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_FINISH_TS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_START_TS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_TURN; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -66,7 +66,7 @@ import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.when; -public class UnifiedRepairUtilsTest extends CQLTester +public class AutoRepairUtilsTest extends CQLTester { static RepairType repairType = RepairType.incremental; static UUID hostId; @@ -82,12 +82,12 @@ public class UnifiedRepairUtilsTest extends CQLTester public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - setUnifiedRepairEnabled(true); + setAutoRepairEnabled(true); requireNetwork(); defaultSnitch = DatabaseDescriptor.getLocator(); localEndpoint = FBUtilities.getBroadcastAddressAndPort(); hostId = StorageService.instance.getHostIdForEndpoint(localEndpoint); - StorageService.instance.doUnifiedRepairSetup(); + StorageService.instance.doAutoRepairSetup(); } @Before @@ -97,14 +97,14 @@ public void setup() QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", "ks")); QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", "ks", "tbl")); - UnifiedRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); MockitoAnnotations.initMocks(this); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY)); + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); } @Test @@ -112,14 +112,14 @@ public void testSetForceRepair() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); - UnifiedRepairUtils.setForceRepair(repairType, ImmutableSet.of(localEndpoint)); + AutoRepairUtils.setForceRepair(repairType, ImmutableSet.of(localEndpoint)); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -129,11 +129,11 @@ public void testSetForceRepair() @Test public void testSetForceRepairNewNode() { - UnifiedRepairUtils.setForceRepairNewNode(repairType); + AutoRepairUtils.setForceRepairNewNode(repairType); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -146,14 +146,14 @@ public void testClearDeleteHosts() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, delete_hosts, delete_hosts_update_time) VALUES ('%s', %s, { %s }, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId, hostId)); - UnifiedRepairUtils.clearDeleteHosts(repairType, hostId); + AutoRepairUtils.clearDeleteHosts(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT delete_hosts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -162,23 +162,23 @@ public void testClearDeleteHosts() } @Test - public void testGetUnifiedRepairHistoryForLocalGroup() + public void testGetAutoRepairHistoryForLocalGroup() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); - List history = UnifiedRepairUtils.getUnifiedRepairHistory(repairType); + List history = AutoRepairUtils.getAutoRepairHistory(repairType); assertNotNull(history); assertEquals(1, history.size()); assertEquals(hostId, history.get(0).hostId); } @Test - public void testGetUnifiedRepairHistoryForLocalGroup_empty_history() + public void testGetAutoRepairHistoryForLocalGroup_empty_history() { - List history = UnifiedRepairUtils.getUnifiedRepairHistory(repairType); + List history = AutoRepairUtils.getAutoRepairHistory(repairType); assertNull(history); } @@ -190,22 +190,22 @@ public void testGetCurrentRepairStatus() UUID regularRepair = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, force_repair, repair_start_ts) VALUES ('%s', %s, true, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), forceRepair)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, repair_start_ts) VALUES ('%s', %s, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), regularRepair)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString(), regularRepair)); - CurrentRepairStatus status = UnifiedRepairUtils.getCurrentRepairStatus(repairType); + CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType); assertNotNull(status); assertEquals(1, status.historiesWithoutOnGoingRepair.size()); @@ -221,7 +221,7 @@ public void testGetCurrentRepairStatus() @Test public void testGetHostIdsInCurrentRing() { - TreeSet hosts = UnifiedRepairUtils.getHostIdsInCurrentRing(repairType); + TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType); assertNotNull(hosts); assertEquals(1, hosts.size()); @@ -233,13 +233,13 @@ public void testGetHostIdsInCurrentRing_multiple_nodes() { InetAddressAndPort ignoredEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 1); InetAddressAndPort deadEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 2); - DatabaseDescriptor.getUnifiedRepairConfig().setIgnoreDCs(repairType, ImmutableSet.of("dc2")); + DatabaseDescriptor.getAutoRepairConfig().setIgnoreDCs(repairType, ImmutableSet.of("dc2")); //DatabaseDescriptor.setEndpointSnitch(snitchMock); when(snitchMock.location(localEndpoint)).thenReturn(new Location("dc1", "rac1")); when(snitchMock.location(ignoredEndpoint)).thenReturn(new Location("dc2", "rac1")); when(snitchMock.location(deadEndpoint)).thenReturn(new Location("dc1", "rac1")); - TreeSet hosts = UnifiedRepairUtils.getHostIdsInCurrentRing(repairType, ImmutableSet.of(new NodeAddresses(localEndpoint), new NodeAddresses(ignoredEndpoint), new NodeAddresses(deadEndpoint))); + TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType, ImmutableSet.of(new NodeAddresses(localEndpoint), new NodeAddresses(ignoredEndpoint), new NodeAddresses(deadEndpoint))); assertNotNull(hosts); assertEquals(1, hosts.size()); @@ -252,93 +252,93 @@ public void testGetHostWithLongestUnrepairTime() UUID otherHostId = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id, repair_finish_ts) VALUES ('%s', %s, toTimestamp(now()))", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), otherHostId)); - UnifiedRepairHistory history = UnifiedRepairUtils.getHostWithLongestUnrepairTime(repairType); + AutoRepairHistory history = AutoRepairUtils.getHostWithLongestUnrepairTime(repairType); assertEquals(hostId, history.hostId); } @Test - public void testGetMaxNumberOfNodeRunUnifiedRepairInGroup_0_group_size() + public void testGetMaxNumberOfNodeRunAutoRepairInGroup_0_group_size() { - DatabaseDescriptor.getUnifiedRepairConfig().setParallelRepairCount(repairType, 2); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 2); - int count = UnifiedRepairUtils.getMaxNumberOfNodeRunUnifiedRepair(repairType, 0); + int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepair(repairType, 0); assertEquals(2, count); } @Test - public void testGetMaxNumberOfNodeRunUnifiedRepairInGroup_percentage() + public void testGetMaxNumberOfNodeRunAutoRepairInGroup_percentage() { - DatabaseDescriptor.getUnifiedRepairConfig().setParallelRepairCount(repairType, 2); - DatabaseDescriptor.getUnifiedRepairConfig().setParallelRepairPercentage(repairType, 50); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 2); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairPercentage(repairType, 50); - int count = UnifiedRepairUtils.getMaxNumberOfNodeRunUnifiedRepair(repairType, 10); + int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepair(repairType, 10); assertEquals(5, count); } @Test - public void testDeleteUnifiedRepairHistory() + public void testDeleteAutoRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); - UnifiedRepairUtils.deleteUnifiedRepairHistory(repairType, hostId); + AutoRepairUtils.deleteAutoRepairHistory(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(0, result.size()); } @Test - public void testUpdateStartUnifiedRepairHistory() + public void testUpdateStartAutoRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); - UnifiedRepairUtils.updateStartUnifiedRepairHistory(repairType, hostId, 123, UnifiedRepairUtils.RepairTurn.MY_TURN); + AutoRepairUtils.updateStartAutoRepairHistory(repairType, hostId, 123, AutoRepairUtils.RepairTurn.MY_TURN); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT repair_start_ts, repair_turn FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); UntypedResultSet.Row row = result.one(); assertEquals(123, row.getLong(COL_REPAIR_START_TS, 0)); - assertEquals(UnifiedRepairUtils.RepairTurn.MY_TURN.toString(), row.getString(COL_REPAIR_TURN)); + assertEquals(AutoRepairUtils.RepairTurn.MY_TURN.toString(), row.getString(COL_REPAIR_TURN)); } @Test - public void testUpdateFinishUnifiedRepairHistory() + public void testUpdateFinishAutoRepairHistory() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); - UnifiedRepairUtils.updateFinishUnifiedRepairHistory(repairType, hostId, 123); + AutoRepairUtils.updateFinishAutoRepairHistory(repairType, hostId, 123); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT repair_finish_ts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), hostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -351,14 +351,14 @@ public void testAddHostIdToDeleteHosts() UUID otherHostId = UUID.randomUUID(); QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), otherHostId)); - UnifiedRepairUtils.addHostIdToDeleteHosts(repairType, hostId, otherHostId); + AutoRepairUtils.addHostIdToDeleteHosts(repairType, hostId, otherHostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType.toString(), otherHostId)); assertNotNull(result); assertEquals(1, result.size()); @@ -371,11 +371,11 @@ public void testAddHostIdToDeleteHosts() @Test public void testAddPriorityHost() { - UnifiedRepairUtils.addPriorityHosts(repairType, ImmutableSet.of(localEndpoint)); + AutoRepairUtils.addPriorityHosts(repairType, ImmutableSet.of(localEndpoint)); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString())); assertNotNull(result); assertEquals(1, result.size()); @@ -390,14 +390,14 @@ public void testRemovePriorityStatus() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString(), hostId)); - UnifiedRepairUtils.removePriorityStatus(repairType, hostId); + AutoRepairUtils.removePriorityStatus(repairType, hostId); UntypedResultSet result = QueryProcessor.executeInternal(String.format( "SELECT * FROM %s.%s WHERE repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString())); assertNotNull(result); assertEquals(1, result.size()); @@ -410,10 +410,10 @@ public void testGetPriorityHosts() { QueryProcessor.executeInternal(String.format( "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_PRIORITY, + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString(), hostId)); - Set hosts = UnifiedRepairUtils.getPriorityHosts(repairType); + Set hosts = AutoRepairUtils.getPriorityHosts(repairType); assertNotNull(hosts); assertEquals(1, hosts.size()); @@ -425,29 +425,29 @@ public void testCheckNodeContainsKeyspaceReplica() { Keyspace ks = Keyspace.open("ks"); - assertTrue(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); } @Test public void testTableMaxRepairTimeExceeded() { - DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairTableMaxRepairTime(repairType, "0s"); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairTableMaxRepairTime(repairType, "0s"); - assertTrue(UnifiedRepairUtils.tableMaxRepairTimeExceeded(repairType, 0)); + assertTrue(AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, 0)); } @Test public void testKeyspaceMaxRepairTimeExceeded() { - DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairTableMaxRepairTime(repairType, "0s"); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairTableMaxRepairTime(repairType, "0s"); - assertTrue(UnifiedRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, 0, 1)); + assertTrue(AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, 0, 1)); } @Test public void testGetLastRepairFinishTime() { - UnifiedRepairHistory history = new UnifiedRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); + AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); assertEquals(0, history.getLastRepairFinishTime()); @@ -461,21 +461,21 @@ public void testMyTurnToRunRepairShouldReturnMyTurnWhenRepairOngoing() { UUID myID = UUID.randomUUID(); UUID otherID = UUID.randomUUID(); - DatabaseDescriptor.getUnifiedRepairConfig().setParallelRepairCount(repairType, 5); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 5); long currentMillis = System.currentTimeMillis(); // finish time less than start time means that repair is ongoing - UnifiedRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); + AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); // finish time is larger than start time means that repair for other node is finished - UnifiedRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); + AutoRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); - assertEquals(UnifiedRepairUtils.RepairTurn.MY_TURN, UnifiedRepairUtils.myTurnToRunRepair(repairType, myID)); + assertEquals(AutoRepairUtils.RepairTurn.MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myID)); } @Test public void testLocalStrategyAndNetworkKeyspace() { - assertFalse(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open("system"))); - assertTrue(UnifiedRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open(KEYSPACE))); + assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open("system"))); + assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open(KEYSPACE))); } @Test @@ -484,10 +484,10 @@ public void testGetLastRepairTimeForNode() UUID myID = UUID.randomUUID(); UUID otherID = UUID.randomUUID(); long currentMillis = System.currentTimeMillis(); - UnifiedRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); - UnifiedRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); + AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); + AutoRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); - assertEquals(currentMillis - 100, UnifiedRepairUtils.getLastRepairTimeForNode(repairType, myID)); + assertEquals(currentMillis - 100, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); } @Test @@ -495,6 +495,6 @@ public void testGetLastRepairTimeForNodeWhenHistoryIsEmpty() { UUID myID = UUID.randomUUID(); - assertEquals(0, UnifiedRepairUtils.getLastRepairTimeForNode(repairType, myID)); + assertEquals(0, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); } } diff --git a/test/unit/org/apache/cassandra/repair/unifiedrepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java similarity index 97% rename from test/unit/org/apache/cassandra/repair/unifiedrepair/SSTableRepairedAtTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java index 14011f85896d..bd14eea805b3 100644 --- a/test/unit/org/apache/cassandra/repair/unifiedrepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.cassandra.repair.unifiedrepair; +package org.apache.cassandra.repair.autorepair; import java.net.UnknownHostException; import java.util.Arrays; @@ -55,8 +55,8 @@ public class SSTableRepairedAtTest extends CQLTester public static void setUp() throws ConfigurationException, UnknownHostException { requireNetwork(); - UnifiedRepairUtils.setup(); - StorageService.instance.doUnifiedRepairSetup(); + AutoRepairUtils.setup(); + StorageService.instance.doAutoRepairSetup(); DatabaseDescriptor.setCDCEnabled(false); } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java new file mode 100644 index 000000000000..054f136dad75 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; + +import static org.junit.Assert.assertEquals; + +public class AutoRepairServiceBasicTest extends CQLTester { + private static AutoRepairService autoRepairService; + private static AutoRepairConfig config; + + @Before + public void setUp() { + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); + config = new AutoRepairConfig(); + autoRepairService = new AutoRepairService(); + autoRepairService.config = config; + } + + @Test + public void testSetup() { + AutoRepairService.instance.config = null; + + AutoRepairService.setup(); + + assertEquals(DatabaseDescriptor.getAutoRepairConfig(), AutoRepairService.instance.config); + } + + @Test + public void testGetAutoRepairConfigReturnsConfig() { + assertEquals(config, autoRepairService.getAutoRepairConfig()); + } + + @Test + public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() { + autoRepairService.setAutoRepairHistoryClearDeleteHostsBufferDuration("100s"); + + assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); + } + + + @Test + public void testsetAutoRepairMaxRetriesCount() { + autoRepairService.setAutoRepairMaxRetriesCount(101); + + assertEquals(101, config.getRepairMaxRetries()); + } + + + @Test + public void testsetAutoRepairRetryBackoffInSec() { + autoRepairService.setAutoRepairRetryBackoff("102s"); + + assertEquals(102, config.getRepairRetryBackoff().toSeconds()); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { + autoRepairService.config = new AutoRepairConfig(false); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setMaterializedViewsEnabled(true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + } +} diff --git a/test/unit/org/apache/cassandra/service/UnifiedRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java similarity index 72% rename from test/unit/org/apache/cassandra/service/UnifiedRepairServiceRepairTypeTest.java rename to test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java index d91c89ee8619..7c8645149adc 100644 --- a/test/unit/org/apache/cassandra/service/UnifiedRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -20,8 +20,8 @@ import com.google.common.collect.ImmutableSet; import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -33,44 +33,44 @@ import java.util.Set; import java.util.UUID; -import static org.apache.cassandra.Util.setUnifiedRepairEnabled; +import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.junit.Assert.assertEquals; @RunWith(Parameterized.class) -public class UnifiedRepairServiceRepairTypeTest extends CQLTester { +public class AutoRepairServiceRepairTypeTest extends CQLTester { @Parameterized.Parameter() - public UnifiedRepairConfig.RepairType repairType; + public AutoRepairConfig.RepairType repairType; private final UUID host1 = UUID.fromString("00000000-0000-0000-0000-000000000001"); private final UUID host2 = UUID.fromString("00000000-0000-0000-0000-000000000002"); - private UnifiedRepairService instance; + private AutoRepairService instance; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() { - return Arrays.asList(UnifiedRepairConfig.RepairType.values()); + public static Collection repairTypes() { + return Arrays.asList(AutoRepairConfig.RepairType.values()); } @BeforeClass public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - setUnifiedRepairEnabled(true); + setAutoRepairEnabled(true); requireNetwork(); } @Before public void setUpTest() { - UnifiedRepairUtils.setup(); - instance = new UnifiedRepairService(); + AutoRepairUtils.setup(); + instance = new AutoRepairService(); } @Test public void testGetOnGoingRepairHostIdsTest() { long now = System.currentTimeMillis(); - UnifiedRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); - UnifiedRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); + AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); + AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); Set hosts = instance.getOnGoingRepairHostIds(repairType); diff --git a/test/unit/org/apache/cassandra/service/UnifiedRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java similarity index 59% rename from test/unit/org/apache/cassandra/service/UnifiedRepairServiceSetterTest.java rename to test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index 57b5500b665e..f34e1f0a7071 100644 --- a/test/unit/org/apache/cassandra/service/UnifiedRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -24,8 +24,8 @@ import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.junit.Before; @@ -43,48 +43,48 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.apache.cassandra.Util.setUnifiedRepairEnabled; +import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.junit.Assert.assertEquals; @RunWith(Parameterized.class) -public class UnifiedRepairServiceSetterTest extends CQLTester { - private static final UnifiedRepairConfig config = new UnifiedRepairConfig(true); +public class AutoRepairServiceSetterTest extends CQLTester { + private static final AutoRepairConfig config = new AutoRepairConfig(true); @Parameterized.Parameter - public UnifiedRepairConfig.RepairType repairType; + public AutoRepairConfig.RepairType repairType; @Parameterized.Parameter(1) public T arg; @Parameterized.Parameter(2) - public BiConsumer setter; + public BiConsumer setter; @Parameterized.Parameter(3) - public Function getter; + public Function getter; @Parameterized.Parameters(name = "{index}: repairType={0}, arg={1}") public static Collection testCases() { DatabaseDescriptor.setConfig(DatabaseDescriptor.loadConfig()); return Stream.of( - forEachRepairType(true, UnifiedRepairService.instance::setUnifiedRepairEnabled, config::isUnifiedRepairEnabled), - forEachRepairType(100, UnifiedRepairService.instance::setRepairThreads, config::getRepairThreads), - forEachRepairType(200, UnifiedRepairService.instance::setRepairSubRangeNum, config::getRepairSubRangeNum), - forEachRepairType(400, UnifiedRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), - forEachRepairType(ImmutableSet.of("dc1", "dc2"), UnifiedRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), - forEachRepairType(true, UnifiedRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), - forEachRepairType(600, UnifiedRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), - forEachRepairType(700, UnifiedRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), - forEachRepairType(true, UnifiedRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), - forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), UnifiedRepairService.instance::setRepairPriorityForHosts, UnifiedRepairUtils::getPriorityHosts), - forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), UnifiedRepairService.instance::setForceRepairForHosts, UnifiedRepairServiceSetterTest::isLocalHostForceRepair) + forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), + forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), + forEachRepairType(200, AutoRepairService.instance::setRepairSubRangeNum, config::getRepairSubRangeNum), + forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), + forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), + forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), + forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), + forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), + forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), + forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), + forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, AutoRepairServiceSetterTest::isLocalHostForceRepair) ).flatMap(Function.identity()).collect(Collectors.toList()); } - private static Set isLocalHostForceRepair(UnifiedRepairConfig.RepairType type) { + private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) { UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( - "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.UNIFIED_REPAIR_HISTORY, hostId, type)); + "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) { return ImmutableSet.of(InetAddressAndPort.getLocalHost()); @@ -92,9 +92,9 @@ private static Set isLocalHostForceRepair(UnifiedRepairConfi return ImmutableSet.of(); } - private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { - Object[][] testCases = new Object[UnifiedRepairConfig.RepairType.values().length][4]; - for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) { + private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { + Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { testCases[repairType.ordinal()] = new Object[]{repairType, arg, setter, getter}; } @@ -104,22 +104,22 @@ private static Stream forEachRepairType(T arg, BiConsumer repairTypes() + public static Collection repairTypes() { - return Arrays.asList(UnifiedRepairConfig.RepairType.values()); + return Arrays.asList(AutoRepairConfig.RepairType.values()); } @Before @@ -66,13 +66,13 @@ public void setUp() throws Exception cmdOutput = new ByteArrayOutputStream(); PrintStream out = new PrintStream(cmdOutput); when(probe.output()).thenReturn(new Output(out, out)); - cmd = new UnifiedRepairStatus(); + cmd = new AutoRepairStatus(); DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.loadConfig(); - setUnifiedRepairEnabled(true); - DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.full, true); - DatabaseDescriptor.getUnifiedRepairConfig().setUnifiedRepairEnabled(UnifiedRepairConfig.RepairType.incremental, true); - when(probe.getUnifiedRepairConfig()).thenReturn(DatabaseDescriptor.getUnifiedRepairConfig()); + setAutoRepairEnabled(true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.full, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + when(probe.getAutoRepairConfig()).thenReturn(DatabaseDescriptor.getAutoRepairConfig()); } @Test(expected = IllegalArgumentException.class) diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java similarity index 79% rename from test/unit/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfigTest.java rename to test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 2e1dded34b54..6e2d79def026 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetUnifiedRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -35,7 +35,7 @@ import org.junit.runners.Suite; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.unifiedrepair.UnifiedRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.tools.NodeProbe; import org.mockito.Mock; import org.mockito.MockitoAnnotations; @@ -46,19 +46,19 @@ import static org.mockito.Mockito.verify; @RunWith(Suite.class) -@Suite.SuiteClasses({ SetUnifiedRepairConfigTest.NoParamTests.class, SetUnifiedRepairConfigTest.RepairTypeParamTests.class, - SetUnifiedRepairConfigTest.RepairTypeAndArgsParamsTests.class }) -public class SetUnifiedRepairConfigTest +@Suite.SuiteClasses({ SetAutoRepairConfigTest.NoParamTests.class, SetAutoRepairConfigTest.RepairTypeParamTests.class, + SetAutoRepairConfigTest.RepairTypeAndArgsParamsTests.class }) +public class SetAutoRepairConfigTest { - protected static UnifiedRepairConfig config; + protected static AutoRepairConfig config; - protected static SetUnifiedRepairConfig cmd; + protected static SetAutoRepairConfig cmd; public static void before(NodeProbe probeMock, PrintStream outMock) { - config = new UnifiedRepairConfig(true); - when(probeMock.getUnifiedRepairConfig()).thenReturn(config); - cmd = new SetUnifiedRepairConfig(); + config = new AutoRepairConfig(true); + when(probeMock.getAutoRepairConfig()).thenReturn(config); + cmd = new SetAutoRepairConfig(); cmd.out = outMock; } @@ -84,15 +84,15 @@ public void testHistoryDeleteHostsClearBufferInSec() cmd.execute(probe); - verify(probe, times(1)).setUnifiedRepairHistoryClearDeleteHostsBufferDuration("1s"); + verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); - // test scenario when unified repair is disabled - when(probe.getUnifiedRepairConfig()).thenReturn(new UnifiedRepairConfig(false)); + // test scenario when auto repair is disabled + when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); cmd.execute(probe); - // test new calls are not made when unified repair is disabled - verify(probe, times(1)).setUnifiedRepairHistoryClearDeleteHostsBufferDuration("1s"); + // test new calls are not made when auto repair is disabled + verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); } @Test @@ -102,7 +102,7 @@ public void testRepairMaxRetries() cmd.execute(probe); - verify(probe, times(1)).setUnifiedRepairMaxRetriesCount(2); + verify(probe, times(1)).setAutoRepairMaxRetriesCount(2); } @@ -113,7 +113,7 @@ public void testRetryBackoffInSec() cmd.execute(probe); - verify(probe, times(1)).setUnifiedRepairRetryBackoff("3s"); + verify(probe, times(1)).setAutoRepairRetryBackoff("3s"); } @Test @@ -143,12 +143,12 @@ public static class RepairTypeParamTests private static PrintStream out; @Parameterized.Parameter - public UnifiedRepairConfig.RepairType repairType; + public AutoRepairConfig.RepairType repairType; @Parameterized.Parameters(name = "repairType={0}") public static Object[] data() { - return UnifiedRepairConfig.RepairType.values(); + return AutoRepairConfig.RepairType.values(); } private static InetAddressAndPort localEndpoint; @@ -173,20 +173,20 @@ public void testNoArgs() @Test public void testRepairSchedulingDisabled() { - when(probe.getUnifiedRepairConfig()).thenReturn(new UnifiedRepairConfig(false)); + when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); cmd.repairType = repairType; cmd.args = ImmutableList.of("threads", "1"); cmd.execute(probe); - verify(out, times(1)).println("Unified-repair is not enabled"); + verify(out, times(1)).println("Auto-repair is not enabled"); verify(probe, times(0)).setRepairThreads(repairType, 1); } @Test public void testRepairTypeDisabled() { - config.setUnifiedRepairEnabled(repairType, false); + config.setAutoRepairEnabled(repairType, false); cmd.repairType = repairType; cmd.args = ImmutableList.of("number_of_repair_threads", "1"); @@ -253,7 +253,7 @@ public void testForceRepairHosts() public static class RepairTypeAndArgsParamsTests { @Parameterized.Parameter - public UnifiedRepairConfig.RepairType repairType; + public AutoRepairConfig.RepairType repairType; @Parameterized.Parameter(1) public String paramType; @@ -262,30 +262,30 @@ public static class RepairTypeAndArgsParamsTests public String paramVal; @Parameterized.Parameter(3) - public Consumer verifyFunc; + public Consumer verifyFunc; @Parameterized.Parameters(name = "repairType={0},paramType={1}") public static Collection testCases() { return Stream.of( - forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setUnifiedRepairEnabled(type, true)), + forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type, true)), forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setRepairThreads(type, 1)), forEachRepairType("number_of_subranges", "2", (type) -> verify(probe, times(1)).setRepairSubRangeNum(type, 2)), forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setRepairMinInterval(type, "3h")), forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type, 4)), - forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setUnifiedRepairTableMaxRepairTime(type, "5s")), + forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type, "5s")), forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type, true)), forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type, 6)), forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type, 7)), forEachRepairType("mv_repair_enabled", "true", (type) -> verify(probe, times(1)).setMVRepairEnabled(type, true)), - forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setUnifiedRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))) + forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))) ).flatMap(Function.identity()).collect(Collectors.toList()); } - private static Stream forEachRepairType(String paramType, String paramVal, Consumer verifyFunc) + private static Stream forEachRepairType(String paramType, String paramVal, Consumer verifyFunc) { - Object[][] testCases = new Object[UnifiedRepairConfig.RepairType.values().length][4]; - for (UnifiedRepairConfig.RepairType repairType : UnifiedRepairConfig.RepairType.values()) + Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { testCases[repairType.ordinal()] = new Object[]{ repairType, paramType, paramVal, verifyFunc }; } @@ -316,12 +316,12 @@ public void test() verifyFunc.accept(repairType); - // test scenario when unified repair is disabled - when(probe.getUnifiedRepairConfig()).thenReturn(new UnifiedRepairConfig(false)); + // test scenario when auto repair is disabled + when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); cmd.execute(probe); - // test new calls are not made when unified repair is disabled + // test new calls are not made when auto repair is disabled verifyFunc.accept(repairType); } } From 6e88789fedc152068c2617809f9555a2507a8868 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Thu, 7 Nov 2024 12:30:28 -0800 Subject: [PATCH 058/257] Implement preview repaired metrics --- .../cassandra/metrics/KeyspaceMetrics.java | 7 +++++ .../cassandra/metrics/TableMetrics.java | 6 ++++ .../apache/cassandra/repair/RepairJob.java | 17 ++++++++++- .../repair/autorepair/AutoRepairConfig.java | 5 +++- .../repair/autorepair/AutoRepairState.java | 30 +++++++++++++++++++ .../repair/consistent/SyncStatSummary.java | 14 +++++++-- .../AutoRepairStateFactoryTest.java | 4 +++ 7 files changed, 78 insertions(+), 5 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java index 209c5a7a8ede..b2c8f6e68f84 100644 --- a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java @@ -145,6 +145,11 @@ public class KeyspaceMetrics public final Counter outOfRangeTokenWrites; /** Lifetime count of paxos requests for keys outside the node's owned token ranges for this keyspace **/ public final Counter outOfRangeTokenPaxosRequests; + /** histogram over the number of desynchronized token ranges detected during preview repair */ + public final Histogram previewedDesynchronizedTokenRanges; + /** histogram over the number of desynchronized bytes detected during preview repair */ + public final Histogram previewedDesynchronizedBytes; + /* * Metrics for inconsistencies detected between repaired data sets across replicas. These @@ -277,6 +282,8 @@ public KeyspaceMetrics(final Keyspace ks) repairSyncTime = createKeyspaceTimer("RepairSyncTime"); partitionsValidated = createKeyspaceHistogram("PartitionsValidated", false); bytesValidated = createKeyspaceHistogram("BytesValidated", false); + previewedDesynchronizedTokenRanges = createKeyspaceHistogram("PreviewedDesynchronizedTokenRanges", false); + previewedDesynchronizedBytes = createKeyspaceHistogram("PreviewedDesynchronizedBytes", false); confirmedRepairedInconsistencies = createKeyspaceMeter("RepairedDataInconsistenciesConfirmed"); unconfirmedRepairedInconsistencies = createKeyspaceMeter("RepairedDataInconsistenciesUnconfirmed"); diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index 719c8af81164..672eebb1eb24 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -222,6 +222,10 @@ public class TableMetrics public final Counter bytesAnticompacted; /** number of bytes where the whole sstable was contained in a repairing range so that we only mutated the repair status */ public final Counter bytesMutatedAnticompaction; + /** number of desynchronized token ranges that were detected during preview repair */ + public final TableHistogram previewedDesynchronizedTokenRanges; + /** number of desynchronized bytes that were detected during preview repair */ + public final TableHistogram previewedDesynchronizedBytes; /** ratio of how much we anticompact vs how much we could mutate the repair status*/ public final Gauge mutatedAnticompactionGauge; @@ -833,6 +837,8 @@ public Long getValue() partitionsValidated = createTableHistogram("PartitionsValidated", cfs.keyspace.metric.partitionsValidated, false); bytesAnticompacted = createTableCounter("BytesAnticompacted"); bytesMutatedAnticompaction = createTableCounter("BytesMutatedAnticompaction"); + previewedDesynchronizedTokenRanges = createTableHistogram("PreviewedDesynchronizedTokenRanges", cfs.keyspace.metric.previewedDesynchronizedTokenRanges, false); + previewedDesynchronizedBytes = createTableHistogram("PreviewedDesynchronizedBytes", cfs.keyspace.metric.previewedDesynchronizedBytes, false); mutatedAnticompactionGauge = createTableGauge("MutatedAnticompactionGauge", () -> { double bytesMutated = bytesMutatedAnticompaction.getCount(); diff --git a/src/java/org/apache/cassandra/repair/RepairJob.java b/src/java/org/apache/cassandra/repair/RepairJob.java index f20fe189f22f..947c6dac6c73 100644 --- a/src/java/org/apache/cassandra/repair/RepairJob.java +++ b/src/java/org/apache/cassandra/repair/RepairJob.java @@ -36,6 +36,13 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.*; +import org.apache.cassandra.repair.consistent.SyncStatSummary; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.repair.state.JobState; +import org.apache.cassandra.streaming.SessionSummary; +import org.apache.cassandra.utils.concurrent.AsyncFuture; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -285,7 +292,15 @@ public void onFailure(Throwable t) logger.warn("{} {}.{} sync failed", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); SystemDistributedKeyspace.failedRepairJob(session.getId(), desc.keyspace, desc.columnFamily, t); } - cfs.metric.repairsCompleted.inc(); + } + else + { + SyncStatSummary.Table summary = new SyncStatSummary.Table(cfs.keyspace.getName(), cfs.getTableName()); + summary.consumeStats(stats); + cfs.metric.previewedDesynchronizedTokenRanges.update(summary.getRanges()); + cfs.metric.previewedDesynchronizedBytes.update(summary.getRanges()); + } + cfs.metric.repairsCompleted.inc(); tryFailure(t instanceof NoSuchRepairSessionExceptionWrapper ? ((NoSuchRepairSessionExceptionWrapper) t).wrapped : t); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index a80a875125a9..bffff0629e12 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -59,7 +59,8 @@ public class AutoRepairConfig implements Serializable public enum RepairType implements Serializable { full, - incremental; + incremental, + preview_repaired; public static AutoRepairState getAutoRepairState(RepairType repairType) { @@ -69,6 +70,8 @@ public static AutoRepairState getAutoRepairState(RepairType repairType) return new FullRepairState(); case incremental: return new IncrementalRepairState(); + case preview_repaired: + return new PreviewRepairedState(); } throw new IllegalArgumentException("Invalid repair type: " + repairType); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index 8d838a138fc9..79fe67f78ce3 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -300,6 +300,36 @@ public void resetWaitCondition() } } +class PreviewRepairedState extends AutoRepairState +{ + public PreviewRepairedState() + { + super(RepairType.preview_repaired); + } + + @Override + public RepairRunnable getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) + { + RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, + !ranges.isEmpty(), false, false, PreviewKind.REPAIRED, false, true, false, false); + + option.getColumnFamilies().addAll(tables); + + return getRepairRunnable(keyspace, option); + } + + @Override + public void progress(String tag, ProgressEvent event) { + if (event.getType() == ProgressEventType.COMPLETE) + { + + } + + super.progress(tag, event); + } +} + class IncrementalRepairState extends AutoRepairState { public IncrementalRepairState() diff --git a/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java b/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java index 855ad4bad344..035df81c99c7 100644 --- a/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java +++ b/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java @@ -81,7 +81,7 @@ public String toString() } } - private static class Table + public static class Table { final String keyspace; @@ -94,7 +94,7 @@ private static class Table final Map, Session> sessions = new HashMap<>(); - Table(String keyspace, String table) + public Table(String keyspace, String table) { this.keyspace = keyspace; this.table = table; @@ -119,7 +119,7 @@ void consumeStat(SyncStat stat) } } - void consumeStats(List stats) + public void consumeStats(List stats) { filter(stats, s -> s.summaries != null).forEach(this::consumeStat); } @@ -174,6 +174,14 @@ public String toString() } return output.toString(); } + + public long getBytes() { + return this.bytes; + } + + public long getRanges() { + return this.ranges; + } } private final Map, Table> summaries = new HashMap<>(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java index a0e5bdc45294..c4d961f2a5fc 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -36,6 +36,10 @@ public void testGetRepairState() { state = RepairType.getAutoRepairState(RepairType.incremental); assert state instanceof IncrementalRepairState; + + state = RepairType.getAutoRepairState(RepairType.preview_repaired); + + assert state instanceof PreviewRepairedState; } @Test From 5683acd9885a81dbe5fa9ecd27ce9b3216e421b0 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Thu, 21 Nov 2024 11:14:34 -0800 Subject: [PATCH 059/257] Improve preview repair metrics --- .../cassandra/metrics/KeyspaceMetrics.java | 7 ------- .../apache/cassandra/metrics/TableMetrics.java | 8 ++++---- .../cassandra/repair/PreviewRepairTask.java | 17 +++++++++++++++++ .../org/apache/cassandra/repair/RepairJob.java | 12 +----------- .../repair/autorepair/AutoRepairState.java | 2 +- .../repair/consistent/SyncStatSummary.java | 11 ++++++++--- 6 files changed, 31 insertions(+), 26 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java index b2c8f6e68f84..209c5a7a8ede 100644 --- a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java @@ -145,11 +145,6 @@ public class KeyspaceMetrics public final Counter outOfRangeTokenWrites; /** Lifetime count of paxos requests for keys outside the node's owned token ranges for this keyspace **/ public final Counter outOfRangeTokenPaxosRequests; - /** histogram over the number of desynchronized token ranges detected during preview repair */ - public final Histogram previewedDesynchronizedTokenRanges; - /** histogram over the number of desynchronized bytes detected during preview repair */ - public final Histogram previewedDesynchronizedBytes; - /* * Metrics for inconsistencies detected between repaired data sets across replicas. These @@ -282,8 +277,6 @@ public KeyspaceMetrics(final Keyspace ks) repairSyncTime = createKeyspaceTimer("RepairSyncTime"); partitionsValidated = createKeyspaceHistogram("PartitionsValidated", false); bytesValidated = createKeyspaceHistogram("BytesValidated", false); - previewedDesynchronizedTokenRanges = createKeyspaceHistogram("PreviewedDesynchronizedTokenRanges", false); - previewedDesynchronizedBytes = createKeyspaceHistogram("PreviewedDesynchronizedBytes", false); confirmedRepairedInconsistencies = createKeyspaceMeter("RepairedDataInconsistenciesConfirmed"); unconfirmedRepairedInconsistencies = createKeyspaceMeter("RepairedDataInconsistenciesUnconfirmed"); diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index 672eebb1eb24..8db594b6822f 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -223,9 +223,9 @@ public class TableMetrics /** number of bytes where the whole sstable was contained in a repairing range so that we only mutated the repair status */ public final Counter bytesMutatedAnticompaction; /** number of desynchronized token ranges that were detected during preview repair */ - public final TableHistogram previewedDesynchronizedTokenRanges; + public final Counter previewedDesynchronizedTokenRanges; /** number of desynchronized bytes that were detected during preview repair */ - public final TableHistogram previewedDesynchronizedBytes; + public final Counter previewedDesynchronizedBytes; /** ratio of how much we anticompact vs how much we could mutate the repair status*/ public final Gauge mutatedAnticompactionGauge; @@ -837,8 +837,8 @@ public Long getValue() partitionsValidated = createTableHistogram("PartitionsValidated", cfs.keyspace.metric.partitionsValidated, false); bytesAnticompacted = createTableCounter("BytesAnticompacted"); bytesMutatedAnticompaction = createTableCounter("BytesMutatedAnticompaction"); - previewedDesynchronizedTokenRanges = createTableHistogram("PreviewedDesynchronizedTokenRanges", cfs.keyspace.metric.previewedDesynchronizedTokenRanges, false); - previewedDesynchronizedBytes = createTableHistogram("PreviewedDesynchronizedBytes", cfs.keyspace.metric.previewedDesynchronizedBytes, false); + previewedDesynchronizedTokenRanges = createTableCounter("PreviewedDesynchronizedTokenRanges"); + previewedDesynchronizedBytes = createTableCounter("PreviewedDesynchronizedBytes"); mutatedAnticompactionGauge = createTableGauge("MutatedAnticompactionGauge", () -> { double bytesMutated = bytesMutatedAnticompaction.getCount(); diff --git a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java index 872199156ee5..8eb1622719ec 100644 --- a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java +++ b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java @@ -26,6 +26,7 @@ import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -92,6 +93,7 @@ public Future performUnsafe(ExecutorPlus executor, Sche if (previewKind == PreviewKind.REPAIRED) maybeSnapshotReplicas(parentSession, keyspace, result.results.get()); // we know its present as summary used it } + emitMetrics(summary); successMessage += "; " + message; coordinator.notification(message); @@ -99,6 +101,21 @@ public Future performUnsafe(ExecutorPlus executor, Sche }); } + private void emitMetrics(SyncStatSummary summary) + { + if (!summary.isEmpty()) + RepairMetrics.previewFailures.inc(); + + summary.getTotals().forEach((key, table) -> { + if (table.isCounter()) + return; + + ColumnFamilyStore cfs = Keyspace.open(key.left).getColumnFamilyStore(key.right); + cfs.metric.previewedDesynchronizedTokenRanges.inc(table.getRanges()); + cfs.metric.previewedDesynchronizedBytes.inc(table.getBytes()); + }); + } + private void maybeSnapshotReplicas(TimeUUID parentSession, String keyspace, List results) { if (!DatabaseDescriptor.snapshotOnRepairedDataMismatch()) diff --git a/src/java/org/apache/cassandra/repair/RepairJob.java b/src/java/org/apache/cassandra/repair/RepairJob.java index 947c6dac6c73..7e52fafd173b 100644 --- a/src/java/org/apache/cassandra/repair/RepairJob.java +++ b/src/java/org/apache/cassandra/repair/RepairJob.java @@ -37,11 +37,9 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.*; -import org.apache.cassandra.repair.consistent.SyncStatSummary; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.repair.state.JobState; -import org.apache.cassandra.streaming.SessionSummary; import org.apache.cassandra.utils.concurrent.AsyncFuture; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -292,15 +290,7 @@ public void onFailure(Throwable t) logger.warn("{} {}.{} sync failed", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); SystemDistributedKeyspace.failedRepairJob(session.getId(), desc.keyspace, desc.columnFamily, t); } - } - else - { - SyncStatSummary.Table summary = new SyncStatSummary.Table(cfs.keyspace.getName(), cfs.getTableName()); - summary.consumeStats(stats); - cfs.metric.previewedDesynchronizedTokenRanges.update(summary.getRanges()); - cfs.metric.previewedDesynchronizedBytes.update(summary.getRanges()); - } - cfs.metric.repairsCompleted.inc(); + cfs.metric.repairsCompleted.inc(); tryFailure(t instanceof NoSuchRepairSessionExceptionWrapper ? ((NoSuchRepairSessionExceptionWrapper) t).wrapped : t); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index 79fe67f78ce3..d0c51da741ea 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -308,7 +308,7 @@ public PreviewRepairedState() } @Override - public RepairRunnable getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) + public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, diff --git a/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java b/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java index 035df81c99c7..4c489e87f15e 100644 --- a/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java +++ b/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java @@ -119,7 +119,7 @@ void consumeStat(SyncStat stat) } } - public void consumeStats(List stats) + void consumeStats(List stats) { filter(stats, s -> s.summaries != null).forEach(this::consumeStat); } @@ -138,7 +138,7 @@ void calculateTotals() totalsCalculated = true; } - boolean isCounter() + public boolean isCounter() { TableMetadata tmd = Schema.instance.getTableMetadata(keyspace, table); return tmd != null && tmd.isCounter(); @@ -180,7 +180,7 @@ public long getBytes() { } public long getRanges() { - return this.ranges; + return this.ranges.size(); } } @@ -241,6 +241,11 @@ private void calculateTotals() totalsCalculated = true; } + public Map, Table> getTotals() { + calculateTotals(); + return summaries; + } + public String toString() { List> tables = Lists.newArrayList(summaries.keySet()); From 62a3526914415d7ce6eac9642958538f70155314 Mon Sep 17 00:00:00 2001 From: kzalys Date: Mon, 25 Nov 2024 20:08:49 -0800 Subject: [PATCH 060/257] Update src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java Co-authored-by: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> From 6b89ff03952fca0a7cf374434f15f74046e71055 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Tue, 3 Dec 2024 23:13:58 -0800 Subject: [PATCH 061/257] Address comments --- .../statements/schema/TableAttributes.java | 3 +++ .../cassandra/repair/PreviewRepairTask.java | 1 - .../repair/autorepair/AutoRepairState.java | 10 ---------- .../cassandra/schema/AutoRepairParams.java | 3 ++- .../cassandra/schema/SchemaKeyspace.java | 8 ++++++-- .../apache/cassandra/schema/TableParams.java | 18 ++++++++++++++++-- .../cql3/statements/DescribeStatementTest.java | 6 ++++-- .../cassandra/db/SchemaCQLHelperTest.java | 3 ++- .../AutoRepairParameterizedTest.java | 14 +++++++++----- 9 files changed, 42 insertions(+), 24 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index c8cfca9731ac..078d19d8eaff 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -204,6 +204,9 @@ private TableParams build(TableParams.Builder builder) if (hasOption(Option.REPAIR_INCREMENTAL)) builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, getMap(Option.REPAIR_INCREMENTAL))); + if (hasOption(Option.REPAIR_PREVIEW_REPAIRED)) + builder.automatedRepairPreviewRepaired(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.preview_repaired, getMap(Option.REPAIR_PREVIEW_REPAIRED))); + return builder.build(); } diff --git a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java index 8eb1622719ec..8de6c72d56b8 100644 --- a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java +++ b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java @@ -89,7 +89,6 @@ public Future performUnsafe(ExecutorPlus executor, Sche else { message = (previewKind == PreviewKind.REPAIRED ? "Repaired data is inconsistent\n" : "Preview complete\n") + summary; - RepairMetrics.previewFailures.inc(); if (previewKind == PreviewKind.REPAIRED) maybeSnapshotReplicas(parentSession, keyspace, result.results.get()); // we know its present as summary used it } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index d0c51da741ea..a52e07da84a0 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -318,16 +318,6 @@ public RepairCoordinator getRepairRunnable(String keyspace, List tables, return getRepairRunnable(keyspace, option); } - - @Override - public void progress(String tag, ProgressEvent event) { - if (event.getType() == ProgressEventType.COMPLETE) - { - - } - - super.progress(tag, event); - } } class IncrementalRepairState extends AutoRepairState diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index ea3802db93d2..aa6082ae7a51 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -46,7 +46,8 @@ public String toString() public static final Map> DEFAULT_OPTIONS = ImmutableMap.of(AutoRepairConfig.RepairType.full, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), - AutoRepairConfig.RepairType.incremental, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); + AutoRepairConfig.RepairType.incremental, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), + AutoRepairConfig.RepairType.preview_repaired, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); public final AutoRepairConfig.RepairType type; diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index ffeb4580eba5..d335403fdc65 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -135,6 +135,7 @@ private SchemaKeyspace() + "fast_path frozen>," + "repair_full frozen>," + "repair_incremental frozen>," + + "repair_preview_repaired frozen>," + "PRIMARY KEY ((keyspace_name), table_name))"); private static final TableMetadata Columns = @@ -221,6 +222,7 @@ private SchemaKeyspace() + "read_repair text," + "repair_full frozen>," + "repair_incremental frozen>," + + "repair_preview_repaired frozen>," + "PRIMARY KEY ((keyspace_name), view_name))"); private static final TableMetadata Indexes = @@ -599,7 +601,8 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("read_repair", params.readRepair.toString()) .add("extensions", params.extensions) .add("repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) - .add("repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); + .add("repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()) + .add("repair_preview_repaired", params.automatedRepair.get(AutoRepairConfig.RepairType.preview_repaired).asMap()); // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ @@ -1091,7 +1094,8 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .readRepair(getReadRepairStrategy(row)) .fastPath(getFastPathStrategy(row)) .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, row.getFrozenTextMap("repair_full"))) - .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))); + .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))) + .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.preview_repaired, row.getFrozenTextMap("repair_preview_repaired"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 2c3cfe4daf28..629d84532097 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -104,7 +104,9 @@ public enum Option TRANSACTIONAL_MIGRATION_FROM, PENDING_DROP, REPAIR_FULL, - REPAIR_INCREMENTAL; + REPAIR_INCREMENTAL, + REPAIR_PREVIEW_REPAIRED, + ; @Override public String toString() @@ -172,6 +174,7 @@ private TableParams(Builder builder) { put(AutoRepairConfig.RepairType.full, builder.automatedRepairFull); put(AutoRepairConfig.RepairType.incremental, builder.automatedRepairIncremental); + put(AutoRepairConfig.RepairType.preview_repaired, builder.automatedRepairPreviewRepaired); } }; } @@ -208,6 +211,7 @@ public static Builder builder(TableParams params) .pendingDrop(params.pendingDrop) .automatedRepairFull(params.automatedRepair.get(AutoRepairConfig.RepairType.full)) .automatedRepairIncremental(params.automatedRepair.get(AutoRepairConfig.RepairType.incremental)) + .automatedRepairPreviewRepaired(params.automatedRepair.get(AutoRepairConfig.RepairType.preview_repaired)) ; } @@ -374,6 +378,7 @@ public String toString() .add(PENDING_DROP.toString(), pendingDrop) .add(Option.REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.full)) .add(Option.REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.incremental)) + .add(Option.REPAIR_PREVIEW_REPAIRED.toString(), automatedRepair.get(AutoRepairConfig.RepairType.preview_repaired)) .toString(); } @@ -438,7 +443,9 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) .newLine() .append("AND repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) .newLine() - .append("AND repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()); + .append("AND repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()) + .newLine() + .append("AND repair_preview_repaired = ").append(automatedRepair.get(AutoRepairConfig.RepairType.preview_repaired).asMap()); } public static final class Builder @@ -469,6 +476,7 @@ public static final class Builder private AutoRepairParams automatedRepairFull = new AutoRepairParams(AutoRepairConfig.RepairType.full); private AutoRepairParams automatedRepairIncremental = new AutoRepairParams(AutoRepairConfig.RepairType.incremental); + private AutoRepairParams automatedRepairPreviewRepaired = new AutoRepairParams(AutoRepairConfig.RepairType.preview_repaired); public Builder() { @@ -628,6 +636,12 @@ public Builder automatedRepairIncremental(AutoRepairParams val) automatedRepairIncremental = val; return this; } + + public Builder automatedRepairPreviewRepaired(AutoRepairParams val) + { + automatedRepairPreviewRepaired = val; + return this; + } } public static class Serializer implements MetadataSerializer diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index 7bcb63c15e1c..c5e5e2e70ee2 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -1145,7 +1145,8 @@ private static String tableParametersCql() " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = '99p'\n" + " AND repair_full = {'enabled': 'true'}\n" + - " AND repair_incremental = {'enabled': 'true'};"; + " AND repair_incremental = {'enabled': 'true'}\n" + + " AND repair_preview_repaired = {'enabled': 'true'};"; } private static String cqlQuoted(Map map) @@ -1174,7 +1175,8 @@ private static String mvParametersCql() " AND read_repair = 'BLOCKING'\n" + " AND speculative_retry = '99p'\n" + " AND repair_full = {'enabled': 'true'}\n" + - " AND repair_incremental = {'enabled': 'true'};"; + " AND repair_incremental = {'enabled': 'true'}\n" + + " AND repair_preview_repaired = {'enabled': 'true'};"; } private static String keyspaceOutput() diff --git a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java index 14688e6f628f..8a6e2d7fbf56 100644 --- a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java +++ b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java @@ -351,7 +351,8 @@ public void testCfmOptionsCQL() " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = 'ALWAYS'\n" + " AND repair_full = {'enabled': 'true'}\n" + - " AND repair_incremental = {'enabled': 'true'};" + " AND repair_incremental = {'enabled': 'true'}\n" + + " AND repair_preview_repaired = {'enabled': 'true'};" )); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index c7533c6dba0e..0b8a378dcc97 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -128,7 +128,7 @@ public void setup() SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", KEYSPACE, TABLE)); - QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH repair_full = {'enabled': 'false'} AND repair_incremental = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH repair_full = {'enabled': 'false'} AND repair_incremental = {'enabled': 'false'} AND repair_preview_repaired = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); @@ -555,15 +555,19 @@ public void testTableAttribute() { assertTrue(TableAttributes.validKeywords().contains("repair_full")); assertTrue(TableAttributes.validKeywords().contains("repair_incremental")); + assertTrue(TableAttributes.validKeywords().contains("repair_preview_repaired")); } @Test public void testDefaultAutomatedRepair() { - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values() ) + { + Assert.assertTrue(String.format("expected repair type %s to be enabled on table %s", repairType, cfm.name), + cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + Assert.assertFalse(String.format("expected repair type %s to be disabled on table %s", repairType, cfmDisabledAutoRepair.name), + cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + } } @Test From f21377aa691ca302ef24e9cb6bf76118d842b48e Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Tue, 3 Dec 2024 23:31:03 -0800 Subject: [PATCH 062/257] Fix preview repair options for system_schema ks --- src/java/org/apache/cassandra/schema/SchemaKeyspace.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index d335403fdc65..d80eef46cb59 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -1095,7 +1095,7 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .fastPath(getFastPathStrategy(row)) .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, row.getFrozenTextMap("repair_full"))) .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))) - .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.preview_repaired, row.getFrozenTextMap("repair_preview_repaired"))); + .automatedRepairPreviewRepaired(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.preview_repaired, row.getFrozenTextMap("repair_preview_repaired"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) From f61c7989cfe1a60c51eb738b9ebd28ab9ad1a064 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Wed, 4 Dec 2024 15:23:50 -0800 Subject: [PATCH 063/257] Implement BytesPreviewed and TokenRangesPreviewed metrics --- .../db/compaction/CompactionManager.java | 4 ++-- .../cassandra/metrics/TableMetrics.java | 22 ++++++++++++------- .../cassandra/repair/PreviewRepairTask.java | 4 ++-- .../cassandra/repair/ValidationManager.java | 6 +++++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java index 7e052cd8c412..a36d0fc49b07 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java @@ -1015,7 +1015,7 @@ private static void mutateFullyContainedSSTables(ColumnFamilyStore cfs, Set fullyContainedSSTables = findSSTablesToAnticompact(sstableIterator, normalizedRanges, sessionID); - cfs.metric.bytesMutatedAnticompaction.inc(SSTableReader.getTotalBytes(fullyContainedSSTables)); + cfs.metric.bytesMutatedAnticompaction.mark(SSTableReader.getTotalBytes(fullyContainedSSTables)); cfs.getCompactionStrategyManager().mutateRepaired(fullyContainedSSTables, UNREPAIRED_SSTABLE, sessionID, isTransient); // since we're just re-writing the sstable metdata for the fully contained sstables, we don't want // them obsoleted when the anti-compaction is complete. So they're removed from the transaction here @@ -1862,7 +1862,7 @@ private void doAntiCompaction(ColumnFamilyStore cfs, // repairedAt values for these, we still avoid anti-compacting already repaired sstables, as we currently don't // make use of any actual repairedAt value and splitting up sstables just for that is not worth it at this point. Set unrepairedSSTables = sstables.stream().filter((s) -> !s.isRepaired()).collect(Collectors.toSet()); - cfs.metric.bytesAnticompacted.inc(SSTableReader.getTotalBytes(unrepairedSSTables)); + cfs.metric.bytesAnticompacted.mark(SSTableReader.getTotalBytes(unrepairedSSTables)); Collection> groupedSSTables = cfs.getCompactionStrategyManager().groupSSTablesForAntiCompaction(unrepairedSSTables); // iterate over sstables to check if the full / transient / unrepaired ranges intersect them. diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index 8db594b6822f..b49c8e73aeba 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -219,13 +219,17 @@ public class TableMetrics /** number of partitions read creating merkle trees */ public final TableHistogram partitionsValidated; /** number of bytes read while doing anticompaction */ - public final Counter bytesAnticompacted; + public final Meter bytesAnticompacted; /** number of bytes where the whole sstable was contained in a repairing range so that we only mutated the repair status */ - public final Counter bytesMutatedAnticompaction; + public final Meter bytesMutatedAnticompaction; + /** number of bytes that were scanned during preview repair */ + public final Meter bytesPreviewed; + /** number of token ranges that were scanned during preview repair */ + public final Meter tokenRangesPreviewed; /** number of desynchronized token ranges that were detected during preview repair */ - public final Counter previewedDesynchronizedTokenRanges; + public final Meter previewedDesynchronizedTokenRanges; /** number of desynchronized bytes that were detected during preview repair */ - public final Counter previewedDesynchronizedBytes; + public final Meter previewedDesynchronizedBytes; /** ratio of how much we anticompact vs how much we could mutate the repair status*/ public final Gauge mutatedAnticompactionGauge; @@ -835,10 +839,12 @@ public Long getValue() bytesValidated = createTableHistogram("BytesValidated", cfs.keyspace.metric.bytesValidated, false); partitionsValidated = createTableHistogram("PartitionsValidated", cfs.keyspace.metric.partitionsValidated, false); - bytesAnticompacted = createTableCounter("BytesAnticompacted"); - bytesMutatedAnticompaction = createTableCounter("BytesMutatedAnticompaction"); - previewedDesynchronizedTokenRanges = createTableCounter("PreviewedDesynchronizedTokenRanges"); - previewedDesynchronizedBytes = createTableCounter("PreviewedDesynchronizedBytes"); + bytesAnticompacted = createTableMeter("BytesAnticompacted"); + bytesMutatedAnticompaction = createTableMeter("BytesMutatedAnticompaction"); + bytesPreviewed = createTableMeter("BytesPreviewed"); + tokenRangesPreviewed = createTableMeter("TokenRangesPreviewed"); + previewedDesynchronizedTokenRanges = createTableMeter("PreviewedDesynchronizedTokenRanges"); + previewedDesynchronizedBytes = createTableMeter("PreviewedDesynchronizedBytes"); mutatedAnticompactionGauge = createTableGauge("MutatedAnticompactionGauge", () -> { double bytesMutated = bytesMutatedAnticompaction.getCount(); diff --git a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java index 8de6c72d56b8..156be931da84 100644 --- a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java +++ b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java @@ -110,8 +110,8 @@ private void emitMetrics(SyncStatSummary summary) return; ColumnFamilyStore cfs = Keyspace.open(key.left).getColumnFamilyStore(key.right); - cfs.metric.previewedDesynchronizedTokenRanges.inc(table.getRanges()); - cfs.metric.previewedDesynchronizedBytes.inc(table.getBytes()); + cfs.metric.previewedDesynchronizedTokenRanges.mark(table.getRanges()); + cfs.metric.previewedDesynchronizedBytes.mark(table.getBytes()); }); } diff --git a/src/java/org/apache/cassandra/repair/ValidationManager.java b/src/java/org/apache/cassandra/repair/ValidationManager.java index ca7ad3a68eea..ff38073bd290 100644 --- a/src/java/org/apache/cassandra/repair/ValidationManager.java +++ b/src/java/org/apache/cassandra/repair/ValidationManager.java @@ -38,6 +38,7 @@ import org.apache.cassandra.metrics.TopPartitionTracker; import org.apache.cassandra.repair.state.ValidationState; import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MerkleTree; import org.apache.cassandra.utils.MerkleTrees; @@ -143,6 +144,11 @@ public static void doValidation(ColumnFamilyStore cfs, Validator validator) thro { cfs.metric.bytesValidated.update(state.estimatedTotalBytes); cfs.metric.partitionsValidated.update(state.partitionsProcessed); + if (validator.getPreviewKind() != PreviewKind.NONE) + { + cfs.metric.tokenRangesPreviewed.mark(validator.desc.ranges.size()); + cfs.metric.bytesPreviewed.mark(state.estimatedTotalBytes); + } if (topPartitionCollector != null) cfs.topPartitions.merge(topPartitionCollector); } From a560d487b85b97a58abfeace4b284343ed4f59b4 Mon Sep 17 00:00:00 2001 From: kzalys Date: Fri, 6 Dec 2024 11:17:37 -0800 Subject: [PATCH 064/257] Update test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java Co-authored-by: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> --- .../repair/autorepair/AutoRepairParameterizedTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 0b8a378dcc97..8e80af454e2b 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -561,7 +561,7 @@ public void testTableAttribute() @Test public void testDefaultAutomatedRepair() { - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values() ) + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { Assert.assertTrue(String.format("expected repair type %s to be enabled on table %s", repairType, cfm.name), cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); From 03d0410caef15931024b9f6e30b0894f235bbda2 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 6 Dec 2024 13:13:12 -0800 Subject: [PATCH 065/257] Address comments --- src/java/org/apache/cassandra/metrics/TableMetrics.java | 8 ++++---- .../org/apache/cassandra/repair/PreviewRepairTask.java | 4 ++-- .../cassandra/repair/consistent/SyncStatSummary.java | 9 ++++++--- .../repair/autorepair/AutoRepairStateFactoryTest.java | 7 ++++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index b49c8e73aeba..c3e70860bbb1 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -227,9 +227,9 @@ public class TableMetrics /** number of token ranges that were scanned during preview repair */ public final Meter tokenRangesPreviewed; /** number of desynchronized token ranges that were detected during preview repair */ - public final Meter previewedDesynchronizedTokenRanges; + public final Meter tokenRangesPreviewedDesynchronized; /** number of desynchronized bytes that were detected during preview repair */ - public final Meter previewedDesynchronizedBytes; + public final Meter bytesPreviewedDesynchronized; /** ratio of how much we anticompact vs how much we could mutate the repair status*/ public final Gauge mutatedAnticompactionGauge; @@ -843,8 +843,8 @@ public Long getValue() bytesMutatedAnticompaction = createTableMeter("BytesMutatedAnticompaction"); bytesPreviewed = createTableMeter("BytesPreviewed"); tokenRangesPreviewed = createTableMeter("TokenRangesPreviewed"); - previewedDesynchronizedTokenRanges = createTableMeter("PreviewedDesynchronizedTokenRanges"); - previewedDesynchronizedBytes = createTableMeter("PreviewedDesynchronizedBytes"); + tokenRangesPreviewedDesynchronized = createTableMeter("TokenRangesPreviewedDesynchronized"); + bytesPreviewedDesynchronized = createTableMeter("BytesPreviewedDesynchronized"); mutatedAnticompactionGauge = createTableGauge("MutatedAnticompactionGauge", () -> { double bytesMutated = bytesMutatedAnticompaction.getCount(); diff --git a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java index 156be931da84..6323d8e2751d 100644 --- a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java +++ b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java @@ -110,8 +110,8 @@ private void emitMetrics(SyncStatSummary summary) return; ColumnFamilyStore cfs = Keyspace.open(key.left).getColumnFamilyStore(key.right); - cfs.metric.previewedDesynchronizedTokenRanges.mark(table.getRanges()); - cfs.metric.previewedDesynchronizedBytes.mark(table.getBytes()); + cfs.metric.tokenRangesPreviewedDesynchronized.mark(table.getRanges()); + cfs.metric.bytesPreviewedDesynchronized.mark(table.getBytes()); }); } diff --git a/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java b/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java index 4c489e87f15e..820c6b011ba6 100644 --- a/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java +++ b/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java @@ -175,11 +175,13 @@ public String toString() return output.toString(); } - public long getBytes() { + public long getBytes() + { return this.bytes; } - public long getRanges() { + public long getRanges() + { return this.ranges.size(); } } @@ -241,7 +243,8 @@ private void calculateTotals() totalsCalculated = true; } - public Map, Table> getTotals() { + public Map, Table> getTotals() + { calculateTotals(); return summaries; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java index c4d961f2a5fc..3acdd313e52d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; public class AutoRepairStateFactoryTest { @@ -31,15 +32,15 @@ public class AutoRepairStateFactoryTest public void testGetRepairState() { AutoRepairState state = RepairType.getAutoRepairState(RepairType.full); - assert state instanceof FullRepairState; + assertTrue(state instanceof FullRepairState); state = RepairType.getAutoRepairState(RepairType.incremental); - assert state instanceof IncrementalRepairState; + assertTrue(state instanceof IncrementalRepairState); state = RepairType.getAutoRepairState(RepairType.preview_repaired); - assert state instanceof PreviewRepairedState; + assertTrue(state instanceof PreviewRepairedState); } @Test From 35fb10014170b7ad6cf094ca9a76218bc2ebc079 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 6 Dec 2024 13:20:20 -0800 Subject: [PATCH 066/257] Update metrics docs --- .../cassandra/pages/managing/operating/metrics.adoc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 2b3e2b75efb4..0dcc3dd60c83 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -249,12 +249,20 @@ during validation. |PartitionsValidated |Histogram |Histogram over the number of partitions read during validation. -|BytesAnticompacted |Counter |How many bytes we anticompacted. +|BytesAnticompacted |Meter |How many bytes we anticompacted. -|BytesMutatedAnticompaction |Counter |How many bytes we avoided +|BytesMutatedAnticompaction |Meter |How many bytes we avoided anticompacting because the sstable was fully contained in the repaired range. +|BytesPreviewed |Meter |How many bytes we scanned during preview repair. + +|TokenRangesPreviewed |Meter |How many token ranges we scanned during preview repair. + +|BytesPreviewedDesynchronized |Meter |How many desynchronized bytes we found during preview repair. + +|TokenRangesPreviewedDesynchronized |Meter |How many desynchronized token ranges we found during preview repair. + |MutatedAnticompactionGauge |Gauge |Ratio of bytes mutated vs total bytes repaired. |=== From f444067c70971ef8604003d58fb497a76f0cf6ff Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 6 Dec 2024 15:01:37 -0800 Subject: [PATCH 067/257] Update metrics --- .../cassandra/pages/managing/operating/metrics.adoc | 9 ++++----- src/java/org/apache/cassandra/metrics/TableMetrics.java | 3 --- src/java/org/apache/cassandra/repair/RepairJob.java | 1 + .../org/apache/cassandra/repair/ValidationManager.java | 1 - 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 0dcc3dd60c83..eb9ce8c8d83f 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -255,13 +255,12 @@ read during validation. anticompacting because the sstable was fully contained in the repaired range. -|BytesPreviewed |Meter |How many bytes we scanned during preview repair. +|BytesPreviewed |Meter |Estimated number of bytes that were scanned for local replica during preview repair -|TokenRangesPreviewed |Meter |How many token ranges we scanned during preview repair. +|BytesPreviewedDesynchronized |Meter |Number of desynchronized bytes that were detected among all replicas during preview repair -|BytesPreviewedDesynchronized |Meter |How many desynchronized bytes we found during preview repair. - -|TokenRangesPreviewedDesynchronized |Meter |How many desynchronized token ranges we found during preview repair. +|TokenRangesPreviewedDesynchronized |Meter |Number of token ranges among all replicas where desynchronization was found +during preview repair. These ranges would need to be streamed during subsequent repair. |MutatedAnticompactionGauge |Gauge |Ratio of bytes mutated vs total bytes repaired. diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index c3e70860bbb1..3aa478fbbb05 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -224,8 +224,6 @@ public class TableMetrics public final Meter bytesMutatedAnticompaction; /** number of bytes that were scanned during preview repair */ public final Meter bytesPreviewed; - /** number of token ranges that were scanned during preview repair */ - public final Meter tokenRangesPreviewed; /** number of desynchronized token ranges that were detected during preview repair */ public final Meter tokenRangesPreviewedDesynchronized; /** number of desynchronized bytes that were detected during preview repair */ @@ -842,7 +840,6 @@ public Long getValue() bytesAnticompacted = createTableMeter("BytesAnticompacted"); bytesMutatedAnticompaction = createTableMeter("BytesMutatedAnticompaction"); bytesPreviewed = createTableMeter("BytesPreviewed"); - tokenRangesPreviewed = createTableMeter("TokenRangesPreviewed"); tokenRangesPreviewedDesynchronized = createTableMeter("TokenRangesPreviewedDesynchronized"); bytesPreviewedDesynchronized = createTableMeter("BytesPreviewedDesynchronized"); mutatedAnticompactionGauge = createTableGauge("MutatedAnticompactionGauge", () -> diff --git a/src/java/org/apache/cassandra/repair/RepairJob.java b/src/java/org/apache/cassandra/repair/RepairJob.java index 7e52fafd173b..583e0e26991c 100644 --- a/src/java/org/apache/cassandra/repair/RepairJob.java +++ b/src/java/org/apache/cassandra/repair/RepairJob.java @@ -37,6 +37,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.*; + import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.repair.state.JobState; diff --git a/src/java/org/apache/cassandra/repair/ValidationManager.java b/src/java/org/apache/cassandra/repair/ValidationManager.java index ff38073bd290..c028102af1a9 100644 --- a/src/java/org/apache/cassandra/repair/ValidationManager.java +++ b/src/java/org/apache/cassandra/repair/ValidationManager.java @@ -146,7 +146,6 @@ public static void doValidation(ColumnFamilyStore cfs, Validator validator) thro cfs.metric.partitionsValidated.update(state.partitionsProcessed); if (validator.getPreviewKind() != PreviewKind.NONE) { - cfs.metric.tokenRangesPreviewed.mark(validator.desc.ranges.size()); cfs.metric.bytesPreviewed.mark(state.estimatedTotalBytes); } if (topPartitionCollector != null) From 2cca0869210279f0f1c04f09acea505b105271d0 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 3 Nov 2024 13:10:20 -0600 Subject: [PATCH 068/257] UnrepairedBytesBasedTokenRangeSplitter prototype Adds prototype UnrepairedBytesBasedTokenRangeSplitter that demonstrates consuming splitter-based properties to limit the repair assignments returned to be bound to the requested amount of work. --- .../io/sstable/format/SSTableScanner.java | 10 + .../sstable/format/big/BigTableScanner.java | 45 ++- ...nrepairedBytesBasedTokenRangeSplitter.java | 337 ++++++++++++++++++ 3 files changed, 387 insertions(+), 5 deletions(-) create mode 100644 src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java index 28035a85da0b..001152d66d11 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Set; @@ -35,6 +36,7 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.AbstractBounds.Boundary; import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.AbstractRowIndexEntry; import org.apache.cassandra.io.sstable.CorruptSSTableException; import org.apache.cassandra.io.sstable.ISSTableScanner; @@ -83,6 +85,14 @@ protected SSTableScanner(S sstable, this.listener = listener; } + public static List> makeBounds(SSTableReader sstable, Collection> tokenRanges) + { + List> boundsList = new ArrayList<>(tokenRanges.size()); + for (Range range : Range.normalize(tokenRanges)) + addRange(sstable, Range.makeRowRange(range), boundsList); + return boundsList; + } + protected static List> makeBounds(SSTableReader sstable, DataRange dataRange) { List> boundsList = new ArrayList<>(2); diff --git a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java index 83243529c4c9..b1e632e5b44c 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.util.Iterator; +import javax.annotation.Nullable; + import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; @@ -64,9 +66,12 @@ private BigTableScanner(BigTableReader sstable, this.rowIndexEntrySerializer = new RowIndexEntry.Serializer(sstable.descriptor.version, sstable.header, sstable.owner().map(SSTable.Owner::getMetrics).orElse(null)); } - private void seekToCurrentRangeStart() + // Helper method to seek to the index for the given position or range and optionally in the data file + private long seekAndProcess(@Nullable PartitionPosition position, + @Nullable AbstractBounds range, + boolean seekDataFile) throws CorruptSSTableException { - long indexPosition = sstable.getIndexScanPosition(currentRange.left); + long indexPosition = sstable.getIndexScanPosition(position); ifile.seek(indexPosition); try { @@ -75,13 +80,21 @@ private void seekToCurrentRangeStart() { indexPosition = ifile.getFilePointer(); DecoratedKey indexDecoratedKey = sstable.decorateKey(ByteBufferUtil.readWithShortLength(ifile)); - if (indexDecoratedKey.compareTo(currentRange.left) > 0 || currentRange.contains(indexDecoratedKey)) + + // Whether the position or range is present in the SSTable. + boolean isFound = (range == null && indexDecoratedKey.compareTo(position) > 0) + || (range != null && (indexDecoratedKey.compareTo(range.left) > 0 || range.contains(indexDecoratedKey))); + if (isFound) { // Found, just read the dataPosition and seek into index and data files long dataPosition = RowIndexEntry.Serializer.readPosition(ifile); + // seek the index file position as we will presumably seek further when going to the next position. ifile.seek(indexPosition); - dfile.seek(dataPosition); - break; + if (seekDataFile) + { + dfile.seek(dataPosition); + } + return dataPosition; } else { @@ -94,6 +107,28 @@ private void seekToCurrentRangeStart() sstable.markSuspect(); throw new CorruptSSTableException(e, sstable.getFilename()); } + // If for whatever reason we don't find position in file, just return 0 + return 0L; + } + + /** + * Seeks to the start of the current range and updates both the index and data file positions. + */ + private void seekToCurrentRangeStart() throws CorruptSSTableException + { + seekAndProcess(currentRange.left, currentRange, true); + } + + /** + * Gets the position in the data file, but does not seek to it. This does seek the index to find the data position + * but does not actually seek the data file. + * @param position position to find in data file. + * @return offset in data file where position exists. + * @throws CorruptSSTableException if SSTable was malformed + */ + public long getDataPosition(PartitionPosition position) throws CorruptSSTableException + { + return seekAndProcess(position, null, false); } protected void doClose() throws IOException diff --git a/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java new file mode 100644 index 000000000000..953fa57faf99 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.config.DataStorageSpec; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.PartitionPosition; +import org.apache.cassandra.db.lifecycle.SSTableIntervalTree; +import org.apache.cassandra.db.lifecycle.SSTableSet; +import org.apache.cassandra.db.lifecycle.View; +import org.apache.cassandra.dht.AbstractBounds; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.sstable.format.big.BigTableReader; +import org.apache.cassandra.io.sstable.format.big.BigTableScanner; +import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.concurrent.Refs; + +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.splitEvenly; + +public class UnrepairedBytesBasedTokenRangeSplitter implements IAutoRepairTokenRangeSplitter +{ + private static final Logger logger = LoggerFactory.getLogger(UnrepairedBytesBasedTokenRangeSplitter.class); + + static final String SUBRANGE_SIZE = "subrange_size"; + static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; + + // target bytes per subrange + private final DataStorageSpec.LongBytesBound subrangeSize; + + // maximum target bytes to repair + private final DataStorageSpec.LongBytesBound maxBytesPerSchedule; + + private final long subrangeBytes; + + private final long maxBytesPerScheduleBytes; + + private static final DataStorageSpec.LongBytesBound DEFAULT_SUBRANGE_SIZE = new DataStorageSpec.LongBytesBound("100GiB"); + private static final DataStorageSpec.LongBytesBound DEFAULT_MAX_BYTES_PER_SCHEDULE = new DataStorageSpec.LongBytesBound("500GiB"); + + public UnrepairedBytesBasedTokenRangeSplitter(Map parameters) + { + // Demonstrates parameterizing a range splitter so we can have splitter specific options. + if (parameters.containsKey(SUBRANGE_SIZE)) + { + subrangeSize = new DataStorageSpec.LongBytesBound(parameters.get(SUBRANGE_SIZE)); + } + else + { + subrangeSize = DEFAULT_SUBRANGE_SIZE; + } + subrangeBytes = subrangeSize.toBytes(); + + if (parameters.containsKey(MAX_BYTES_PER_SCHEDULE)) + { + maxBytesPerSchedule = new DataStorageSpec.LongBytesBound(parameters.get(MAX_BYTES_PER_SCHEDULE)); + } + else + { + maxBytesPerSchedule = DEFAULT_MAX_BYTES_PER_SCHEDULE; + } + maxBytesPerScheduleBytes = maxBytesPerSchedule.toBytes(); + + logger.info("Configured {} with {}={}, {}={}", UnrepairedBytesBasedTokenRangeSplitter.class.getName(), + SUBRANGE_SIZE, subrangeSize, MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule); + } + + @Override + public List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) + { + List repairAssignments = new ArrayList<>(); + + logger.info("Calculating token range splits for repairType={} primaryRangeOnly={} keyspaceName={} tableNames={}", repairType, primaryRangeOnly, keyspaceName, tableNames); + if (repairType != AutoRepairConfig.RepairType.incremental) + { + throw new IllegalArgumentException(this.getClass().getName() + " only supports " + AutoRepairConfig.RepairType.incremental + " repair"); + } + + // TODO: create a custom repair assignment that indicates number of bytes in repair and join tables by byte size. + Collection> tokenRanges = getTokenRanges(primaryRangeOnly, keyspaceName); + for (String tableName : tableNames) + { + repairAssignments.addAll(getRepairAssignmentsForTable(keyspaceName, tableName, tokenRanges)); + } + return repairAssignments; + } + + public List getRepairAssignmentsForTable(String keyspaceName, String tableName, Collection> tokenRanges) + { + List repairAssignments = new ArrayList<>(); + + long targetBytesSoFar = 0; + + for (Range tokenRange : tokenRanges) + { + logger.info("Calculating unrepaired bytes for {}.{} for range {}", keyspaceName, tableName, tokenRange); + // Capture the amount of unrepaired bytes for range + long approximateUnrepairedBytesForRange = 0L; + // Capture the total bytes in read sstables, this will be useful for calculating the ratio + // of data in SSTables including this range and also useful to know how much anticompaction there will be. + long totalBytesInUnrepairedSSTables = 0L; + try (Refs refs = getSSTableReaderRefs(keyspaceName, tableName, tokenRange)) + { + for (SSTableReader reader : refs) + { + // Only evaluate unrepaired SSTables. + if (!reader.isRepaired()) + { + long sstableSize = reader.bytesOnDisk(); + totalBytesInUnrepairedSSTables += sstableSize; + // get the bounds of the sstable for this range using the index file but do not actually read it. + List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); + try (BigTableScanner scanner = (BigTableScanner) BigTableScanner.getScanner((BigTableReader) reader, Collections.singleton(tokenRange))) + { + assert bounds.size() == 1; + + AbstractBounds bound = bounds.get(0); + long startPosition = scanner.getDataPosition(bound.left); + long endPosition = scanner.getDataPosition(bound.right); + // If end position is 0 we can assume the sstable ended before that token, bound at size of file + if (endPosition == 0) + { + endPosition = sstableSize; + } + + long approximateRangeBytesInSSTable = Math.max(0, endPosition - startPosition); + // get the fraction of the sstable belonging to the range. + approximateUnrepairedBytesForRange += Math.min(approximateRangeBytesInSSTable, sstableSize); + double ratio = approximateRangeBytesInSSTable / (double) sstableSize; + logger.info("Calculations for {}.{} {}: sstableSize={}, rangeBytesInSSTable={}, startPosition={}, endPosition={}, ratio={}", + keyspaceName, tableName, reader.descriptor.baseFile().name(), + FileUtils.stringifyFileSize(sstableSize), FileUtils.stringifyFileSize(approximateRangeBytesInSSTable), startPosition, endPosition, ratio); + } + } + else + { + logger.info("Skipping over {}.{} {} ({}) because it is repaired", keyspaceName, tableName, reader.descriptor.baseFile().name(), FileUtils.stringifyFileSize(reader.bytesOnDisk())); + } + } + } + + // Only consider token range if it had unrepaired sstables or live data in memtables. + if (totalBytesInUnrepairedSSTables > 0L) + { + // TODO: Possibly some anticompaction configuration we want here, where if we detect a large amount of anticompaction we want to reduce the work we do. + double ratio = approximateUnrepairedBytesForRange / (double) totalBytesInUnrepairedSSTables; + logger.info("Calculated unrepaired bytes for {}.{} for range {}: sstableSize={}, rangeBytesInSSTables={}, ratio={}", keyspaceName, tableName, tokenRange, + FileUtils.stringifyFileSize(totalBytesInUnrepairedSSTables), FileUtils.stringifyFileSize(approximateUnrepairedBytesForRange), ratio); + + // TODO: split on byte size here, this is currently a bit naive in assuming that data is evenly distributed among the range which may not be the + // right assumption. May want to consider when splitting on these ranges to reevaluate how much data is in the range, but for this + // exists as a demonstration. + if (approximateUnrepairedBytesForRange < subrangeBytes) + { + // accept range as is if less than bytes. + logger.info("Using 1 repair assignment for {}.{} for range {} as {} is less than {}", keyspaceName, tableName, tokenRange, + FileUtils.stringifyFileSize(approximateUnrepairedBytesForRange), subrangeSize); + // TODO: this is a bit repetitive see if can reduce more. + RepairAssignment assignment = new BytesBasedRepairAssignment(tokenRange, keyspaceName, Collections.singletonList(tableName), approximateUnrepairedBytesForRange); + if (canAddAssignment(assignment, targetBytesSoFar, approximateUnrepairedBytesForRange)) + { + repairAssignments.add(assignment); + targetBytesSoFar += approximateUnrepairedBytesForRange; + } + else + return repairAssignments; + } + else + { + long targetRanges = approximateUnrepairedBytesForRange / subrangeBytes; + // TODO: approximation per range, this is a bit lossy since targetRanges rounds down. + long approximateBytesPerSplit = approximateUnrepairedBytesForRange / targetRanges; + logger.info("Splitting {}.{} for range {} into {} sub ranges, approximateBytesPerSplit={}", keyspaceName, tableName, tokenRange, targetRanges, FileUtils.stringifyFileSize(approximateBytesPerSplit)); + List> splitRanges = splitEvenly(tokenRange, (int) targetRanges); + int splitRangeCount = 0; + for (Range splitRange : splitRanges) + { + RepairAssignment assignment = new BytesBasedRepairAssignment(splitRange, keyspaceName, Collections.singletonList(tableName), approximateBytesPerSplit); + if (canAddAssignment(assignment, targetBytesSoFar, approximateBytesPerSplit)) + { + logger.info("Added repair assignment for {}.{} for subrange {} (#{}/{}) with approximateBytes={}", + keyspaceName, tableName, splitRange, ++splitRangeCount, splitRanges.size(), FileUtils.stringifyFileSize(approximateBytesPerSplit)); + repairAssignments.add(assignment); + targetBytesSoFar += approximateBytesPerSplit; + } + else + return repairAssignments; + } + } + } + else + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); + if (memtableSize > 0L) + { + logger.info("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", keyspaceName, tableName, tokenRange, memtableSize); + RepairAssignment assignment = new BytesBasedRepairAssignment(tokenRange, keyspaceName, Collections.singletonList(tableName), memtableSize); + if (targetBytesSoFar >= maxBytesPerScheduleBytes) + { + return repairAssignments; + } + repairAssignments.add(assignment); + targetBytesSoFar += memtableSize; + } + else + { + logger.info("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", keyspaceName, tableName, tokenRange); + } + } + } + return repairAssignments; + } + + private boolean canAddAssignment(RepairAssignment repairAssignment, long targetBytesSoFar, long bytesToBeAdded) + { + if (targetBytesSoFar + bytesToBeAdded < maxBytesPerScheduleBytes) + { + return true; + } + logger.warn("Refusing to add {} with a target size of {} because it would increase total repair bytes to {} which is greater than {}={}", + repairAssignment, FileUtils.stringifyFileSize(bytesToBeAdded), FileUtils.stringifyFileSize(targetBytesSoFar + bytesToBeAdded), MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule); + return false; + } + + public Collection> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) + { + // Collect all applicable token ranges + Collection> wrappedRanges; + if (primaryRangeOnly) + { + wrappedRanges = StorageService.instance.getPrimaryRanges(keyspaceName); + } + else + { + wrappedRanges = StorageService.instance.getLocalRanges(keyspaceName); + } + + // Unwrap each range as we need to account for ranges that overlap the ring + Collection> ranges = new ArrayList<>(); + for (Range wrappedRange : wrappedRanges) + { + ranges.addAll(wrappedRange.unwrap()); + } + + return ranges; + } + + public Refs getSSTableReaderRefs(String keyspaceName, String tableName, Range tokenRange) + { + final ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + + if (cfs == null) + { + throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, tableName)); + } + + Iterable sstables = cfs.getTracker().getView().select(SSTableSet.CANONICAL); + SSTableIntervalTree tree = SSTableIntervalTree.build(sstables); + Range r = Range.makeRowRange(tokenRange); + Iterable canonicalSSTables = View.sstablesInBounds(r.left, r.right, tree); + + // TODO: may need to reason about this not working. + return Refs.ref(canonicalSSTables); + } + + public static class BytesBasedRepairAssignment extends RepairAssignment + { + private final long approximateBytes; + + public BytesBasedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames, long approximateBytes) + { + super(tokenRange, keyspaceName, tableNames); + this.approximateBytes = approximateBytes; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + BytesBasedRepairAssignment that = (BytesBasedRepairAssignment) o; + return approximateBytes == that.approximateBytes; + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), approximateBytes); + } + + @Override + public String toString() + { + return "BytesBasedRepairAssignment{" + + "keyspaceName='" + keyspaceName + '\'' + + ", approximateBytes=" + approximateBytes + + ", tokenRange=" + tokenRange + + ", tableNames=" + tableNames + + '}'; + } + + public long getApproximateBytes() + { + return approximateBytes; + } + } +} From 51eb7fc78d6decb36d0ba70c66b552822ddf7480 Mon Sep 17 00:00:00 2001 From: Chris Lohfink Date: Thu, 7 Nov 2024 15:38:16 -0600 Subject: [PATCH 069/257] Add Repair range splitter --- .../statements/schema/TableAttributes.java | 6 +- .../repair/autorepair/AutoRepairConfig.java | 12 +- .../repair/autorepair/AutoRepairState.java | 6 +- .../repair/autorepair/AutoRepairUtils.java | 35 +- .../DefaultAutoRepairTokenSplitter.java | 5 +- .../autorepair/RepairRangeSplitter.java | 396 ++++++++++++++++++ ...nrepairedBytesBasedTokenRangeSplitter.java | 8 +- .../cassandra/schema/AutoRepairParams.java | 6 +- .../cassandra/schema/SchemaKeyspace.java | 12 +- .../apache/cassandra/schema/TableParams.java | 30 +- .../cassandra/service/AutoRepairService.java | 2 +- .../test/repair/AutoRepairSchedulerTest.java | 8 +- .../config/YamlConfigurationLoaderTest.java | 4 +- .../AutoRepairParameterizedTest.java | 20 +- .../AutoRepairStateFactoryTest.java | 6 +- .../repair/autorepair/AutoRepairTest.java | 8 +- .../autorepair/AutoRepairUtilsTest.java | 2 +- .../autorepair/RepairRangeSplitterTest.java | 239 +++++++++++ .../service/AutoRepairServiceBasicTest.java | 10 +- .../tools/nodetool/AutoRepairStatusTest.java | 4 +- 20 files changed, 725 insertions(+), 94 deletions(-) create mode 100644 src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index 078d19d8eaff..e9b495d7d9fd 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -199,13 +199,13 @@ private TableParams build(TableParams.Builder builder) builder.transactionalMigrationFrom(TransactionalMigrationFromMode.fromString(getString(Option.TRANSACTIONAL_MIGRATION_FROM))); if (hasOption(Option.REPAIR_FULL)) - builder.automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, getMap(Option.REPAIR_FULL))); + builder.automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.FULL, getMap(Option.REPAIR_FULL))); if (hasOption(Option.REPAIR_INCREMENTAL)) - builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, getMap(Option.REPAIR_INCREMENTAL))); + builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.INCREMENTAL, getMap(Option.REPAIR_INCREMENTAL))); if (hasOption(Option.REPAIR_PREVIEW_REPAIRED)) - builder.automatedRepairPreviewRepaired(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.preview_repaired, getMap(Option.REPAIR_PREVIEW_REPAIRED))); + builder.automatedRepairPreviewRepaired(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, getMap(Option.REPAIR_PREVIEW_REPAIRED))); return builder.build(); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index bffff0629e12..4a06d78c6eb6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -58,19 +58,19 @@ public class AutoRepairConfig implements Serializable public enum RepairType implements Serializable { - full, - incremental, - preview_repaired; + FULL, + INCREMENTAL, + PREVIEW_REPAIRED; public static AutoRepairState getAutoRepairState(RepairType repairType) { switch (repairType) { - case full: + case FULL: return new FullRepairState(); - case incremental: + case INCREMENTAL: return new IncrementalRepairState(); - case preview_repaired: + case PREVIEW_REPAIRED: return new PreviewRepairedState(); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index a52e07da84a0..e5e848c9d50b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -304,7 +304,7 @@ class PreviewRepairedState extends AutoRepairState { public PreviewRepairedState() { - super(RepairType.preview_repaired); + super(RepairType.PREVIEW_REPAIRED); } @Override @@ -324,7 +324,7 @@ class IncrementalRepairState extends AutoRepairState { public IncrementalRepairState() { - super(RepairType.incremental); + super(RepairType.INCREMENTAL); } @Override @@ -369,7 +369,7 @@ class FullRepairState extends AutoRepairState { public FullRepairState() { - super(RepairType.full); + super(RepairType.FULL); } @Override diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 0a328de44f98..79bf638eb7d9 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.TreeSet; import java.util.UUID; @@ -34,6 +35,7 @@ import com.google.common.collect.Lists; import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Splitter; import org.apache.cassandra.dht.Token; import org.apache.cassandra.locator.LocalStrategy; @@ -74,6 +76,7 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.utils.NoSpamLogger; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; @@ -849,29 +852,19 @@ public static void runRepairOnNewlyBootstrappedNodeIfEnabled() } } - public static List> splitEvenly(Range tokenRange, int numberOfSplits) + public static Collection> split(Range tokenRange, int numberOfSplits) { - List> splitRanges = new ArrayList<>(); - long left = (Long) tokenRange.left.getTokenValue(); - long right = (Long) tokenRange.right.getTokenValue(); - long repairTokenWidth = (right - left) / numberOfSplits; - for (int i = 0; i < numberOfSplits; i++) + Collection> ranges; + Optional splitter = DatabaseDescriptor.getPartitioner().splitter(); + if (splitter.isEmpty()) { - long curLeft = left + (i * repairTokenWidth); - long curRight = curLeft + repairTokenWidth; - - if ((i + 1) == numberOfSplits) - { - curRight = right; - } - - Token childStartToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + curLeft); - Token childEndToken = ClusterMetadata.current().partitioner.getTokenFactory().fromString("" + curRight); - logger.debug("Current Token Left side {}, right side {}", childStartToken - .toString(), childEndToken.toString()); - Range splitRange = new Range<>(childStartToken, childEndToken); - splitRanges.add(splitRange); + NoSpamLogger.log(logger, NoSpamLogger.Level.WARN, 30, TimeUnit.MINUTES, "Partitioner {} does not support splitting, falling back to splitting by token range", DatabaseDescriptor.getPartitioner()); + ranges = Collections.singleton(tokenRange); + } + else + { + ranges = splitter.get().split(Collections.singleton(tokenRange), numberOfSplits); } - return splitRanges; + return ranges; } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java index 9a884f61c581..7c118b99ac7d 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java @@ -29,7 +29,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.service.StorageService; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.splitEvenly; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; public class DefaultAutoRepairTokenSplitter implements IAutoRepairTokenRangeSplitter { @@ -48,12 +48,11 @@ public List getRepairAssignments(AutoRepairConfig.RepairType r int numberOfSubranges = config.getRepairSubRangeNum(repairType); boolean byKeyspace = config.getRepairByKeyspace(repairType); - // collect all token ranges. List> allRanges = new ArrayList<>(); for (Range token : tokens) { - allRanges.addAll(splitEvenly(token, numberOfSubranges)); + allRanges.addAll(split(token, numberOfSubranges)); } if (byKeyspace) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java new file mode 100644 index 000000000000..09a5234b6fb0 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -0,0 +1,396 @@ +package org.apache.cassandra.repair.autorepair; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.clearspring.analytics.stream.cardinality.CardinalityMergeException; +import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus; +import com.clearspring.analytics.stream.cardinality.ICardinality; +import org.apache.cassandra.config.DataStorageSpec; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.PartitionPosition; +import org.apache.cassandra.db.lifecycle.SSTableIntervalTree; +import org.apache.cassandra.db.lifecycle.SSTableSet; +import org.apache.cassandra.db.lifecycle.View; +import org.apache.cassandra.dht.AbstractBounds; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.sstable.format.big.BigTableReader; +import org.apache.cassandra.io.sstable.format.big.BigTableScanner; +import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; +import org.apache.cassandra.io.sstable.metadata.MetadataType; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.concurrent.Refs; + +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; + +/** + * In Apache Cassandra, tuning the repair ranges has three main goals: + *

+ * 1. **Create smaller, consistent repair times**: Long repairs, such as those lasting 15 hours, can be problematic. + * If a node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact + * of disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration + * becoming the main bottleneck. + *

+ * 2. **Minimize the impact on hosts**: Repairs should not heavily affect the host systems. For incremental repairs, + * this might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide + * partitions—can lead to issues with disk usage and higher compaction costs. + *

+ * 3. **Reduce overstreaming**: The Merkle tree, which represents data within each partition and range, has a maximum size. + * If a repair covers too many partitions, the tree’s leaves represent larger data ranges. Even a small change in a leaf + * can trigger excessive data streaming, making the process inefficient. + *

+ * Additionally, if there are many small tables, it's beneficial to batch these tables together under a single parent repair. + * This prevents the repair overhead from becoming a bottleneck, especially when dealing with hundreds of tables. Running + * individual repairs for each table can significantly impact performance and efficiency. + *

+ * To manage these issues, the strategy involves estimating the size and number of partitions within a range and splitting + * it accordingly to bound the size of the range splits. + */ + +public class RepairRangeSplitter implements IAutoRepairTokenRangeSplitter +{ + private static final Logger logger = LoggerFactory.getLogger(RepairRangeSplitter.class); + + static final String SUBRANGE_SIZE = "bytes_per_assignment"; + static final String PARTITION_COUNT = "partitions_per_assignment"; + static final String TABLE_BATCH_LIMIT = "max_tables_per_assignment"; + static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; + + private final int tablesPerAssignmentLimit; + private final long maxBytesPerScheduleBytes; + private final long bytesPerSubrange; + private final long partitionsPerSubrange; + + private static final DataStorageSpec.LongBytesBound DEFAULT_SUBRANGE_SIZE = new DataStorageSpec.LongBytesBound("100GiB"); + private static final long DEFAULT_MAX_BYTES_PER_SCHEDULE = Long.MAX_VALUE; + private static final long DEFAULT_PARTITION_LIMIT = (long) Math.pow(2, DatabaseDescriptor.getRepairSessionMaxTreeDepth()); + private static final int DEFAULT_TABLE_BATCH_LIMIT = 64; + + public RepairRangeSplitter(Map parameters) + { + // Demonstrates parameterizing a range splitter so we can have splitter specific options. + DataStorageSpec.LongBytesBound subrangeSize; + if (parameters.containsKey(SUBRANGE_SIZE)) + { + subrangeSize = new DataStorageSpec.LongBytesBound(parameters.get(SUBRANGE_SIZE)); + } + else + { + subrangeSize = DEFAULT_SUBRANGE_SIZE; + } + bytesPerSubrange = subrangeSize.toBytes(); + + if (parameters.containsKey(MAX_BYTES_PER_SCHEDULE)) + { + maxBytesPerScheduleBytes = new DataStorageSpec.LongBytesBound(parameters.get(MAX_BYTES_PER_SCHEDULE)).toBytes(); + } + else + { + maxBytesPerScheduleBytes = DEFAULT_MAX_BYTES_PER_SCHEDULE; + } + + if (parameters.containsKey(PARTITION_COUNT)) + { + partitionsPerSubrange = Long.parseLong(parameters.get(PARTITION_COUNT)); + } + else + { + partitionsPerSubrange = DEFAULT_PARTITION_LIMIT; + } + + if (parameters.containsKey(TABLE_BATCH_LIMIT)) + { + tablesPerAssignmentLimit = Integer.parseInt(parameters.get(TABLE_BATCH_LIMIT)); + } + else + { + tablesPerAssignmentLimit = DEFAULT_TABLE_BATCH_LIMIT; + } + + logger.info("Configured {} with {}={}, {}={}, {}={}, {}={}", RepairRangeSplitter.class.getName(), + SUBRANGE_SIZE, subrangeSize, MAX_BYTES_PER_SCHEDULE, maxBytesPerScheduleBytes, + PARTITION_COUNT, partitionsPerSubrange, TABLE_BATCH_LIMIT, tablesPerAssignmentLimit); + } + + @Override + public List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) + { + logger.debug("Calculating token range splits for repairType={} primaryRangeOnly={} keyspaceName={} tableNames={}", repairType, primaryRangeOnly, keyspaceName, tableNames); + Collection> tokenRanges = getTokenRanges(primaryRangeOnly, keyspaceName); + return getRepairAssignments(repairType, keyspaceName, tableNames, tokenRanges); + } + + @VisibleForTesting + List getRepairAssignments(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Collection> tokenRanges) + { + List repairAssignments = new ArrayList<>(); + // this is used for batching minimal single assignment tables together + List currentAssignments = new ArrayList<>(); + + for (String tableName : tableNames) + { + List tableAssignments = getRepairAssignmentsForTable(repairType, keyspaceName, tableName, tokenRanges); + + if (tableAssignments.isEmpty()) + continue; + + // If the table assignments are for the same token range and we have room to add more tables to the current assignment + if (tableAssignments.size() == 1 && currentAssignments.size() < tablesPerAssignmentLimit && + (currentAssignments.isEmpty() || currentAssignments.get(0).getTokenRange().equals(tableAssignments.get(0).getTokenRange()))) + { + currentAssignments.addAll(tableAssignments); + } + else + { + if (!currentAssignments.isEmpty()) + { + repairAssignments.add(merge(currentAssignments)); + currentAssignments.clear(); + } + repairAssignments.addAll(tableAssignments); + } + } + if (!currentAssignments.isEmpty()) + repairAssignments.add(merge(currentAssignments)); + return repairAssignments; + } + + @VisibleForTesting + static RepairAssignment merge(List assignments) + { + if (assignments.isEmpty()) + throw new IllegalStateException("Cannot merge empty assignments"); + + Set mergedTableNames = new HashSet<>(); + Range referenceTokenRange = assignments.get(0).getTokenRange(); + String referenceKeyspaceName = assignments.get(0).getKeyspaceName(); + + for (RepairAssignment assignment : assignments) + { + // These checks _should_ be unnecessary but are here to ensure that the assignments are consistent + if (!assignment.getTokenRange().equals(referenceTokenRange)) + throw new IllegalStateException("All assignments must have the same token range"); + if (!assignment.getKeyspaceName().equals(referenceKeyspaceName)) + throw new IllegalStateException("All assignments must have the same keyspace name"); + + mergedTableNames.addAll(assignment.getTableNames()); + } + + return new RepairAssignment(referenceTokenRange, referenceKeyspaceName, new ArrayList<>(mergedTableNames)); + } + + public List getRepairAssignmentsForTable(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Collection> tokenRanges) + { + List repairAssignments = new ArrayList<>(); + + long targetBytesSoFar = 0; + + List sizeEstimates = getRangeSizeEstimate(repairType, keyspaceName, tableName, tokenRanges); + // since its possible for us to hit maxBytesPerScheduleBytes before seeing all ranges, shuffle so there is chance + // at least of hitting all the ranges _eventually_ for the worst case scenarios + Collections.shuffle(sizeEstimates); + for (SizeEstimate estimate : sizeEstimates) + { + if (estimate.sizeInRange == 0) + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); + if (memtableSize > 0L) + { + logger.debug("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", keyspaceName, tableName, estimate.tokenRange, memtableSize); + RepairAssignment assignment = new RepairAssignment(estimate.tokenRange, keyspaceName, Collections.singletonList(tableName)); + repairAssignments.add(assignment); + } + else + { + logger.debug("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", keyspaceName, tableName, estimate.tokenRange); + } + } + else if (targetBytesSoFar + estimate.sizeInRange < maxBytesPerScheduleBytes) + { + targetBytesSoFar += estimate.sizeInRange; + // Check if the estimate needs splitting based on the criteria + boolean needsSplitting = estimate.sizeInRange > bytesPerSubrange || estimate.partitions > partitionsPerSubrange; + + if (needsSplitting) + { + // Calculate the number of splits needed for size and partitions + int splitsForSize = (int) Math.ceil((double) estimate.sizeInRange / bytesPerSubrange); + int splitsForPartitions = (int) Math.ceil((double) estimate.partitions / partitionsPerSubrange); + + // Choose the larger of the two as the number of splits + int numberOfSplits = Math.max(splitsForSize, splitsForPartitions); + + // Split the token range into subranges + Collection> subranges = split(estimate.tokenRange, numberOfSplits); + for (Range subrange : subranges) + { + RepairAssignment assignment = new RepairAssignment(subrange, keyspaceName, Collections.singletonList(tableName)); + repairAssignments.add(assignment); + } + } + else + { + // No splitting needed, repair the entire range as-is + RepairAssignment assignment = new RepairAssignment(estimate.tokenRange, keyspaceName, Collections.singletonList(tableName)); + repairAssignments.add(assignment); + } + } + else + { + // Really this is "Ok" but it does mean we are relying on randomness to cover the other ranges + logger.info("Skipping range {} for {}.{} as it would exceed maxBytesPerScheduleBytes, consider increasing maxBytesPerScheduleBytes, reducing node density or monitoring to ensure all ranges do get repaired within gc_grace_seconds", estimate.tokenRange, keyspaceName, tableName); + } + } + return repairAssignments; + } + + public Collection> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) + { + // Collect all applicable token ranges + Collection> wrappedRanges; + if (primaryRangeOnly) + { + wrappedRanges = StorageService.instance.getPrimaryRanges(keyspaceName); + } + else + { + wrappedRanges = StorageService.instance.getLocalRanges(keyspaceName); + } + + // Unwrap each range as we need to account for ranges that overlap the ring + List> ranges = new ArrayList<>(); + for (Range wrappedRange : wrappedRanges) + { + ranges.addAll(wrappedRange.unwrap()); + } + return ranges; + } + + private List getRangeSizeEstimate(AutoRepairConfig.RepairType repairType, String keyspace, String table, Collection> tokenRanges) + { + List sizeEstimates = new ArrayList<>(); + for (Range tokenRange : tokenRanges) + { + logger.debug("Calculating size estimate for {}.{} for range {}", keyspace, table, tokenRange); + try (Refs refs = getSSTableReaderRefs(repairType, keyspace, table, tokenRange)) + { + SizeEstimate estimate = getSizesForRangeOfSSTables(keyspace, table, tokenRange, refs); + logger.debug("Size estimate for {}.{} for range {} is {}", keyspace, table, tokenRange, estimate); + sizeEstimates.add(estimate); + } + } + return sizeEstimates; + } + + private static SizeEstimate getSizesForRangeOfSSTables(String keyspace, String table, Range tokenRange, Refs refs) + { + ICardinality cardinality = new HyperLogLogPlus(13, 25); + long approxBytesInRange = 0L; + long totalBytes = 0L; + + for (SSTableReader reader : refs) + { + try + { + if (reader.openReason == SSTableReader.OpenReason.EARLY) + continue; + CompactionMetadata metadata = (CompactionMetadata) reader.descriptor.getMetadataSerializer().deserialize(reader.descriptor, MetadataType.COMPACTION); + if (metadata != null) + cardinality = cardinality.merge(metadata.cardinalityEstimator); + + long sstableSize = reader.bytesOnDisk(); + totalBytes += sstableSize; + // TODO since reading the index file anyway may be able to get more accurate ratio for partition count, + // still better to use the cardinality estimator then the index since it wont count duplicates. + // get the bounds of the sstable for this range using the index file but do not actually read it. + List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); + try (BigTableScanner scanner = (BigTableScanner) BigTableScanner.getScanner((BigTableReader) reader, Collections.singleton(tokenRange))) + { + assert bounds.size() == 1; + + AbstractBounds bound = bounds.get(0); + long startPosition = scanner.getDataPosition(bound.left); + long endPosition = scanner.getDataPosition(bound.right); + // If end position is 0 we can assume the sstable ended before that token, bound at size of file + if (endPosition == 0) + { + endPosition = sstableSize; + } + + long approximateRangeBytesInSSTable = Math.max(0, endPosition - startPosition); + approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); + } + } + catch (IOException | CardinalityMergeException e) + { + logger.error("Error calculating size estimate for {}.{} for range {} on {}", keyspace, table, tokenRange, reader, e); + } + } + double ratio = approxBytesInRange / (double) totalBytes; + // use the ratio from size to estimate the partitions in the range as well + long partitions = (long) Math.max(1, Math.ceil(cardinality.cardinality() * ratio)); + return new SizeEstimate(keyspace, table, tokenRange, partitions, approxBytesInRange, totalBytes); + } + + private static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Range tokenRange) + { + final ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + + if (cfs == null) + { + throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, tableName)); + } + + Refs refs = null; + + while (refs == null) + { + Iterable sstables = cfs.getTracker().getView().select(SSTableSet.CANONICAL); + SSTableIntervalTree tree = SSTableIntervalTree.build(sstables); + Range r = Range.makeRowRange(tokenRange); + List canonicalSSTables = View.sstablesInBounds(r.left, r.right, tree); + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) + { + canonicalSSTables = canonicalSSTables.stream().filter(SSTableReader::isRepaired).collect(Collectors.toList()); + } + refs = Refs.tryRef(canonicalSSTables); + } + return refs; + } + + @VisibleForTesting + protected static class SizeEstimate + { + public final String keyspace; + public final String table; + public final Range tokenRange; + public final long partitions; + public final long sizeInRange; + public final long totalSize; + + public SizeEstimate(String keyspace, String table, Range tokenRange, long partitions, long sizeInRange, long totalSize) + { + this.keyspace = keyspace; + this.table = table; + this.tokenRange = tokenRange; + this.partitions = partitions; + this.sizeInRange = sizeInRange; + this.totalSize = totalSize; + } + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java index 953fa57faf99..91d5783abe8e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java @@ -44,7 +44,7 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.concurrent.Refs; -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.splitEvenly; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; public class UnrepairedBytesBasedTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { @@ -99,9 +99,9 @@ public List getRepairAssignments(AutoRepairConfig.RepairType r List repairAssignments = new ArrayList<>(); logger.info("Calculating token range splits for repairType={} primaryRangeOnly={} keyspaceName={} tableNames={}", repairType, primaryRangeOnly, keyspaceName, tableNames); - if (repairType != AutoRepairConfig.RepairType.incremental) + if (repairType != AutoRepairConfig.RepairType.INCREMENTAL) { - throw new IllegalArgumentException(this.getClass().getName() + " only supports " + AutoRepairConfig.RepairType.incremental + " repair"); + throw new IllegalArgumentException(this.getClass().getName() + " only supports " + AutoRepairConfig.RepairType.INCREMENTAL + " repair"); } // TODO: create a custom repair assignment that indicates number of bytes in repair and join tables by byte size. @@ -199,7 +199,7 @@ public List getRepairAssignmentsForTable(String keyspaceName, // TODO: approximation per range, this is a bit lossy since targetRanges rounds down. long approximateBytesPerSplit = approximateUnrepairedBytesForRange / targetRanges; logger.info("Splitting {}.{} for range {} into {} sub ranges, approximateBytesPerSplit={}", keyspaceName, tableName, tokenRange, targetRanges, FileUtils.stringifyFileSize(approximateBytesPerSplit)); - List> splitRanges = splitEvenly(tokenRange, (int) targetRanges); + Collection> splitRanges = split(tokenRange, (int) targetRanges); int splitRangeCount = 0; for (Range splitRange : splitRanges) { diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index aa6082ae7a51..7a27f0293f39 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -45,9 +45,9 @@ public String toString() } public static final Map> DEFAULT_OPTIONS = - ImmutableMap.of(AutoRepairConfig.RepairType.full, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), - AutoRepairConfig.RepairType.incremental, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), - AutoRepairConfig.RepairType.preview_repaired, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); + ImmutableMap.of(AutoRepairConfig.RepairType.FULL, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), + AutoRepairConfig.RepairType.INCREMENTAL, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), + AutoRepairConfig.RepairType.PREVIEW_REPAIRED, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); public final AutoRepairConfig.RepairType type; diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index d80eef46cb59..ee219188261b 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -600,9 +600,9 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("compression", params.compression.asMap()) .add("read_repair", params.readRepair.toString()) .add("extensions", params.extensions) - .add("repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) - .add("repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()) - .add("repair_preview_repaired", params.automatedRepair.get(AutoRepairConfig.RepairType.preview_repaired).asMap()); + .add("repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).asMap()) + .add("repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).asMap()) + .add("repair_preview_repaired", params.automatedRepair.get(AutoRepairConfig.RepairType.PREVIEW_REPAIRED).asMap()); // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ @@ -1093,9 +1093,9 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) .fastPath(getFastPathStrategy(row)) - .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.full, row.getFrozenTextMap("repair_full"))) - .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.incremental, row.getFrozenTextMap("repair_incremental"))) - .automatedRepairPreviewRepaired(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.preview_repaired, row.getFrozenTextMap("repair_preview_repaired"))); + .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.FULL, row.getFrozenTextMap("repair_full"))) + .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.INCREMENTAL, row.getFrozenTextMap("repair_incremental"))) + .automatedRepairPreviewRepaired(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, row.getFrozenTextMap("repair_preview_repaired"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 629d84532097..d769f88a4452 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -172,9 +172,9 @@ private TableParams(Builder builder) automatedRepair = new EnumMap(AutoRepairConfig.RepairType.class) { { - put(AutoRepairConfig.RepairType.full, builder.automatedRepairFull); - put(AutoRepairConfig.RepairType.incremental, builder.automatedRepairIncremental); - put(AutoRepairConfig.RepairType.preview_repaired, builder.automatedRepairPreviewRepaired); + put(AutoRepairConfig.RepairType.FULL, builder.automatedRepairFull); + put(AutoRepairConfig.RepairType.INCREMENTAL, builder.automatedRepairIncremental); + put(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, builder.automatedRepairPreviewRepaired); } }; } @@ -209,9 +209,9 @@ public static Builder builder(TableParams params) .transactionalMode(params.transactionalMode) .transactionalMigrationFrom(params.transactionalMigrationFrom) .pendingDrop(params.pendingDrop) - .automatedRepairFull(params.automatedRepair.get(AutoRepairConfig.RepairType.full)) - .automatedRepairIncremental(params.automatedRepair.get(AutoRepairConfig.RepairType.incremental)) - .automatedRepairPreviewRepaired(params.automatedRepair.get(AutoRepairConfig.RepairType.preview_repaired)) + .automatedRepairFull(params.automatedRepair.get(AutoRepairConfig.RepairType.FULL)) + .automatedRepairIncremental(params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL)) + .automatedRepairPreviewRepaired(params.automatedRepair.get(AutoRepairConfig.RepairType.PREVIEW_REPAIRED)) ; } @@ -376,9 +376,9 @@ public String toString() .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) .add(PENDING_DROP.toString(), pendingDrop) - .add(Option.REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.full)) - .add(Option.REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.incremental)) - .add(Option.REPAIR_PREVIEW_REPAIRED.toString(), automatedRepair.get(AutoRepairConfig.RepairType.preview_repaired)) + .add(Option.REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.FULL)) + .add(Option.REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL)) + .add(Option.REPAIR_PREVIEW_REPAIRED.toString(), automatedRepair.get(AutoRepairConfig.RepairType.PREVIEW_REPAIRED)) .toString(); } @@ -441,11 +441,11 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()) .newLine() - .append("AND repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.full).asMap()) + .append("AND repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.FULL).asMap()) .newLine() - .append("AND repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.incremental).asMap()) + .append("AND repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).asMap()) .newLine() - .append("AND repair_preview_repaired = ").append(automatedRepair.get(AutoRepairConfig.RepairType.preview_repaired).asMap()); + .append("AND repair_preview_repaired = ").append(automatedRepair.get(AutoRepairConfig.RepairType.PREVIEW_REPAIRED).asMap()); } public static final class Builder @@ -474,9 +474,9 @@ public static final class Builder public TransactionalMigrationFromMode transactionalMigrationFrom = TransactionalMigrationFromMode.none; public boolean pendingDrop = false; - private AutoRepairParams automatedRepairFull = new AutoRepairParams(AutoRepairConfig.RepairType.full); - private AutoRepairParams automatedRepairIncremental = new AutoRepairParams(AutoRepairConfig.RepairType.incremental); - private AutoRepairParams automatedRepairPreviewRepaired = new AutoRepairParams(AutoRepairConfig.RepairType.preview_repaired); + private AutoRepairParams automatedRepairFull = new AutoRepairParams(AutoRepairConfig.RepairType.FULL); + private AutoRepairParams automatedRepairIncremental = new AutoRepairParams(AutoRepairConfig.RepairType.INCREMENTAL); + private AutoRepairParams automatedRepairPreviewRepaired = new AutoRepairParams(AutoRepairConfig.RepairType.PREVIEW_REPAIRED); public Builder() { diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 7a8342c2a7a4..d8fd68253596 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -61,7 +61,7 @@ public void checkCanRun(RepairType repairType) if (!config.isAutoRepairSchedulingEnabled()) throw new ConfigurationException("Auto-repair scheduller is disabled."); - if (repairType != RepairType.incremental) + if (repairType != RepairType.INCREMENTAL) return; if (DatabaseDescriptor.isMaterializedViewsOnRepairEnabled()) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 9b583f006507..3c180dbdd613 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -60,14 +60,14 @@ public static void init() throws IOException .set("auto_repair", ImmutableMap.of( "repair_type_overrides", - ImmutableMap.of(AutoRepairConfig.RepairType.full.toString(), + ImmutableMap.of(AutoRepairConfig.RepairType.FULL.toString(), ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", "parallel_repair_count", "1", "parallel_repair_percentage", "0", "min_repair_interval", "1s"), - AutoRepairConfig.RepairType.incremental.toString(), + AutoRepairConfig.RepairType.INCREMENTAL.toString(), ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", @@ -105,8 +105,8 @@ public void testScheduler() throws ParseException // wait for a couple of minutes for repair to go through on all three nodes Uninterruptibles.sleepUninterruptibly(2, TimeUnit.MINUTES); - validate(AutoRepairConfig.RepairType.full.toString()); - validate(AutoRepairConfig.RepairType.incremental.toString()); + validate(AutoRepairConfig.RepairType.FULL.toString()); + validate(AutoRepairConfig.RepairType.INCREMENTAL.toString()); } private void validate(String repairType) throws ParseException diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 9a0203c301bd..43bb8602485f 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -303,8 +303,8 @@ public void fromMapTest() assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_send_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(true, config.auto_repair.enabled); - assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.incremental)); - config.auto_repair.setMVRepairEnabled(AutoRepairConfig.RepairType.incremental, false); + assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.INCREMENTAL)); + config.auto_repair.setMVRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 8e80af454e2b..f4bfd04dda6f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -520,10 +520,10 @@ public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws @Test public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() { - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.incremental).repairEnabled()); + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).repairEnabled()); + Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).repairEnabled()); + Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).repairEnabled()); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairMinInterval(repairType, "0s"); @@ -564,9 +564,13 @@ public void testDefaultAutomatedRepair() for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { Assert.assertTrue(String.format("expected repair type %s to be enabled on table %s", repairType, cfm.name), - cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).repairEnabled()); + Assert.assertTrue(String.format("expected repair type %s to be enabled on table %s", repairType, cfm.name), + cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).repairEnabled()); + Assert.assertFalse(String.format("expected repair type %s to be disabled on table %s", repairType, cfmDisabledAutoRepair.name), + cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).repairEnabled()); Assert.assertFalse(String.format("expected repair type %s to be disabled on table %s", repairType, cfmDisabledAutoRepair.name), - cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.full).repairEnabled()); + cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).repairEnabled()); } } @@ -679,7 +683,7 @@ public void testRepairThrowsForIRWithMVReplay() AutoRepair.instance.setup(); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - if (repairType == AutoRepairConfig.RepairType.incremental) + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) { try { @@ -703,7 +707,7 @@ public void testRepairThrowsForIRWithCDCReplay() AutoRepair.instance.setup(); DatabaseDescriptor.setCDCOnRepairEnabled(true); - if (repairType == AutoRepairConfig.RepairType.incremental) + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) { try { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java index 3acdd313e52d..a45583b9bf87 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -30,15 +30,15 @@ public class AutoRepairStateFactoryTest { @Test public void testGetRepairState() { - AutoRepairState state = RepairType.getAutoRepairState(RepairType.full); + AutoRepairState state = RepairType.getAutoRepairState(RepairType.FULL); assertTrue(state instanceof FullRepairState); - state = RepairType.getAutoRepairState(RepairType.incremental); + state = RepairType.getAutoRepairState(RepairType.INCREMENTAL); assertTrue(state instanceof IncrementalRepairState); - state = RepairType.getAutoRepairState(RepairType.preview_repaired); + state = RepairType.getAutoRepairState(RepairType.PREVIEW_REPAIRED); assertTrue(state instanceof PreviewRepairedState); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index df9f105b615f..425dc9b31f37 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -57,8 +57,8 @@ public void setup() AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.full, true); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.FULL, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); AutoRepairService.setup(); } @@ -101,7 +101,7 @@ public void testSafeGuardSetupCall() @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithCDCReplay() { - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); @@ -113,7 +113,7 @@ public void testSetupFailsWhenIREnabledWithCDCReplay() @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithMVReplay() { - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.incremental, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); AutoRepair instance = new AutoRepair(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 3629c03041ab..9da8d1d87bb2 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -68,7 +68,7 @@ public class AutoRepairUtilsTest extends CQLTester { - static RepairType repairType = RepairType.incremental; + static RepairType repairType = RepairType.INCREMENTAL; static UUID hostId; static InetAddressAndPort localEndpoint; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java new file mode 100644 index 000000000000..f1367aad10f1 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; + +import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.TABLE_BATCH_LIMIT; +import static org.junit.Assert.assertEquals; + +public class RepairRangeSplitterTest extends CQLTester +{ + private RepairRangeSplitter repairRangeSplitter; + private String tableName; + private static Range fullRange; + + @BeforeClass + public static void setUpClass() + { + CQLTester.setUpClass(); + fullRange = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); + } + + @Before + public void setUp() { + repairRangeSplitter = new RepairRangeSplitter(Collections.emptyMap()); + tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + } + + @Test + public void testGetRepairAssignmentsForTable_NoSSTables() { + Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); + assertEquals(0, assignments.size()); + } + + @Test + public void testGetRepairAssignmentsForTable_Single() throws Throwable { + Collection> ranges = Collections.singleton(new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken())); + insertAndFlushSingleTable(tableName); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); + assertEquals(1, assignments.size()); + } + + @Test + public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable { + repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "2")); + Collection> ranges = Collections.singleton(fullRange); + + List tableNames = createAndInsertTables(3); + List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + + // We expect two assignments, one with table1 and table2 batched, and one with table3 + assertEquals(2, assignments.size()); + assertEquals(2, assignments.get(0).getTableNames().size()); + assertEquals(1, assignments.get(1).getTableNames().size()); + } + + @Test + public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable { + repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "2")); + Collection> ranges = Collections.singleton(fullRange); + + List tableNames = createAndInsertTables(2); + List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + + // We expect one assignment, with two tables batched + assertEquals(1, assignments.size()); + assertEquals(2, assignments.get(0).getTableNames().size()); + } + + @Test + public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable { + repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "1")); + Collection> ranges = Collections.singleton(fullRange); + + List tableNames = createAndInsertTables(3); + List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + + assertEquals(3, assignments.size()); + } + + @Test + public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { + repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "100")); + Collection> ranges = Collections.singleton(fullRange); + + List tableNames = createAndInsertTables(5); + List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + + assertEquals(1, assignments.size()); + } + + + + @Test(expected = IllegalStateException.class) + public void testMergeEmptyAssignments() { + // Test when the list of assignments is empty + List emptyAssignments = Collections.emptyList(); + RepairRangeSplitter.merge(emptyAssignments); + } + + @Test + public void testMergeSingleAssignment() { + // Test when there is only one assignment in the list + String keyspaceName = "testKeyspace"; + List tableNames = Arrays.asList("table1", "table2"); + + RepairAssignment assignment = new RepairAssignment(fullRange, keyspaceName, tableNames); + List assignments = Collections.singletonList(assignment); + + RepairAssignment result = RepairRangeSplitter.merge(assignments); + + assertEquals(fullRange, result.getTokenRange()); + assertEquals(keyspaceName, result.getKeyspaceName()); + assertEquals(new HashSet<>(tableNames), new HashSet<>(result.getTableNames())); + } + + @Test + public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() { + // Test merging multiple assignments with the same token range and keyspace + String keyspaceName = "testKeyspace"; + List tableNames1 = Arrays.asList("table1", "table2"); + List tableNames2 = Arrays.asList("table2", "table3"); + + RepairAssignment assignment1 = new RepairAssignment(fullRange, keyspaceName, tableNames1); + RepairAssignment assignment2 = new RepairAssignment(fullRange, keyspaceName, tableNames2); + List assignments = Arrays.asList(assignment1, assignment2); + + RepairAssignment result = RepairRangeSplitter.merge(assignments); + + assertEquals(fullRange, result.getTokenRange()); + assertEquals(keyspaceName, result.getKeyspaceName()); + assertEquals(new HashSet<>(Arrays.asList("table1", "table2", "table3")), new HashSet<>(result.getTableNames())); + } + + @Test(expected = IllegalStateException.class) + public void testMergeDifferentTokenRange() { + // Test merging assignments with different token ranges + Iterator> range = AutoRepairUtils.split(fullRange, 2).iterator(); // Split the full range into two ranges ie (0-100, 100-200 + Range tokenRange1 = range.next(); + Range tokenRange2 = range.next(); + Assert.assertFalse(range.hasNext()); + + String keyspaceName = "testKeyspace"; + List tableNames = Arrays.asList("table1", "table2"); + + RepairAssignment assignment1 = new RepairAssignment(tokenRange1, keyspaceName, tableNames); + RepairAssignment assignment2 = new RepairAssignment(tokenRange2, keyspaceName, tableNames); + List assignments = Arrays.asList(assignment1, assignment2); + + RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException + } + + @Test(expected = IllegalStateException.class) + public void testMergeDifferentKeyspaceName() { + // Test merging assignments with different keyspace names + List tableNames = Arrays.asList("table1", "table2"); + + RepairAssignment assignment1 = new RepairAssignment(fullRange, "keyspace1", tableNames); + RepairAssignment assignment2 = new RepairAssignment(fullRange, "keyspace2", tableNames); + List assignments = Arrays.asList(assignment1, assignment2); + + RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException + } + + @Test + public void testMergeWithDuplicateTables() { + // Test merging assignments with duplicate table names + String keyspaceName = "testKeyspace"; + List tableNames1 = Arrays.asList("table1", "table2"); + List tableNames2 = Arrays.asList("table2", "table3"); + + RepairAssignment assignment1 = new RepairAssignment(fullRange, keyspaceName, tableNames1); + RepairAssignment assignment2 = new RepairAssignment(fullRange, keyspaceName, tableNames2); + List assignments = Arrays.asList(assignment1, assignment2); + + RepairAssignment result = RepairRangeSplitter.merge(assignments); + + // The merged result should contain all unique table names + assertEquals(new HashSet<>(Arrays.asList("table1", "table2", "table3")), new HashSet<>(result.getTableNames())); + } + + private void insertAndFlushSingleTable(String tableName) throws Throwable { + execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); + flush(); + } + + private List createAndInsertTables(int count) throws Throwable { + List tableNames = new ArrayList<>(); + for (int i = 0; i < count; i++) { + String tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + tableNames.add(tableName); + insertAndFlushTable(tableName); + } + return tableNames; + } + + private void insertAndFlushTable(String tableName) throws Throwable { + executeFormattedQuery("INSERT INTO " + KEYSPACE + '.' + tableName + " (k, v) values (?, ?)", 1, 1); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(KEYSPACE, tableName); + cfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + assertEquals(1, cfs.getLiveSSTables().size()); + } +} diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index 054f136dad75..f8911acbd3ee 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -84,14 +84,14 @@ public void testsetAutoRepairRetryBackoffInSec() { public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { autoRepairService.config = new AutoRepairConfig(false); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } @Test(expected = ConfigurationException.class) public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } @Test @@ -99,20 +99,20 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setMaterializedViewsEnabled(true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } @Test(expected = ConfigurationException.class) public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } @Test public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } } diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java index 16b12ee2350e..f1faabc0fc03 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -70,8 +70,8 @@ public void setUp() throws Exception DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.loadConfig(); setAutoRepairEnabled(true); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.full, true); - DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.incremental, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.FULL, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); when(probe.getAutoRepairConfig()).thenReturn(DatabaseDescriptor.getAutoRepairConfig()); } From 807d6edaa754b11545f458ce08e4298d6fca46e8 Mon Sep 17 00:00:00 2001 From: Chris Lohfink Date: Thu, 7 Nov 2024 21:01:45 -0600 Subject: [PATCH 070/257] add assignment priorirty to table options --- .../IAutoRepairTokenRangeSplitter.java | 33 +++++++ .../autorepair/RepairRangeSplitter.java | 13 +++ .../cassandra/schema/AutoRepairParams.java | 28 ++++-- .../autorepair/RepairRangeSplitterTest.java | 91 +++++++++++++++++++ 4 files changed, 159 insertions(+), 6 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index 169600eca405..a4760bc3f94f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -18,11 +18,17 @@ package org.apache.cassandra.repair.autorepair; +import java.util.Comparator; import java.util.List; +import java.util.Map; import java.util.Objects; +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.schema.AutoRepairParams; public interface IAutoRepairTokenRangeSplitter { @@ -38,6 +44,21 @@ public interface IAutoRepairTokenRangeSplitter */ List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames); + /** + * Reorders the list of {@link RepairAssignment} objects based on their priority for a given repair type. + * The list is sorted in descending order, so higher priority assignments appear first. + * If two assignments have the same priority for the specified repair type, their original order is preserved. + * + * @param repairAssignments A list of {@link RepairAssignment} objects to be reordered. + * @param repairType The {@link AutoRepairConfig.RepairType} used to determine the priority of each assignment. + * The priority is determined using the {@link RepairAssignment#getPriority(AutoRepairConfig.RepairType)} method. + */ + @VisibleForTesting + default void reorderByPriority(List repairAssignments, AutoRepairConfig.RepairType repairType) + { + repairAssignments.sort(Comparator.comparingInt(a -> ((RepairAssignment) a).getPriority(repairType)).reversed()); + } + /** * Defines a repair assignment to be issued by the autorepair framework. */ @@ -71,6 +92,18 @@ public List getTableNames() return tableNames; } + public int getPriority(AutoRepairConfig.RepairType type) + { + int max = 0; + for (String table : tableNames) + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, table); + if (cfs != null) + max = Math.max(max, cfs.metadata().params.automatedRepair.get(type).priority()); + } + return max; + } + @Override public boolean equals(Object o) { diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 09a5234b6fb0..a13fa9748bb5 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -141,6 +142,16 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy // this is used for batching minimal single assignment tables together List currentAssignments = new ArrayList<>(); + // sort the tables by size so can batch the smallest ones together + tableNames.sort((t1, t2) -> { + ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); + ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); + if (cfs1 == null || cfs2 == null) + { + throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, t1)); + } + return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); + }); for (String tableName : tableNames) { List tableAssignments = getRepairAssignmentsForTable(repairType, keyspaceName, tableName, tokenRanges); @@ -166,6 +177,8 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy } if (!currentAssignments.isEmpty()) repairAssignments.add(merge(currentAssignments)); + + reorderByPriority(repairAssignments, repairType); return repairAssignments; } diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index 7a27f0293f39..feadbe4e227e 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -17,6 +17,8 @@ */ package org.apache.cassandra.schema; +import java.util.Arrays; +import java.util.EnumMap; import java.util.HashMap; import java.util.Map; import java.util.Objects; @@ -35,7 +37,8 @@ public final class AutoRepairParams { public enum Option { - ENABLED; + ENABLED, + PRIORITY; @Override public String toString() @@ -44,10 +47,15 @@ public String toString() } } + public static final Map DEFAULT_SUB_OPTIONS = ImmutableMap.of( + Option.ENABLED.toString(), Boolean.toString(true), + Option.PRIORITY.toString(), "0" + ); + public static final Map> DEFAULT_OPTIONS = - ImmutableMap.of(AutoRepairConfig.RepairType.FULL, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), - AutoRepairConfig.RepairType.INCREMENTAL, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true)), - AutoRepairConfig.RepairType.PREVIEW_REPAIRED, ImmutableMap.of(Option.ENABLED.toString(), Boolean.toString(true))); + ImmutableMap.of(AutoRepairConfig.RepairType.FULL, DEFAULT_SUB_OPTIONS, + AutoRepairConfig.RepairType.INCREMENTAL, DEFAULT_SUB_OPTIONS, + AutoRepairConfig.RepairType.PREVIEW_REPAIRED, DEFAULT_SUB_OPTIONS); public final AutoRepairConfig.RepairType type; @@ -60,7 +68,7 @@ public String toString() public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Map options) { - Map> optionsMap = new HashMap<>(); + Map> optionsMap = new EnumMap<>(AutoRepairConfig.RepairType.class); for (Map.Entry> entry : DEFAULT_OPTIONS.entrySet()) { optionsMap.put(entry.getKey(), new HashMap<>(entry.getValue())); @@ -69,7 +77,7 @@ public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Ma { for (Map.Entry entry : options.entrySet()) { - if (!Option.ENABLED.toString().equals(toLowerCaseLocalized(entry.getKey()))) + if (Arrays.stream(Option.values()).noneMatch(option -> option.toString().equalsIgnoreCase(entry.getKey()))) { throw new ConfigurationException(format("Unknown property '%s'", entry.getKey())); } @@ -89,6 +97,14 @@ public boolean repairEnabled() : Boolean.parseBoolean(enabled); } + public int priority() + { + String priority = options.get(type).get(Option.PRIORITY.toString()); + return priority == null + ? Integer.parseInt(DEFAULT_OPTIONS.get(type).get(Option.PRIORITY.toString())) + : Integer.parseInt(priority); + } + public void validate() { String enabled = options.get(type).get(Option.ENABLED.toString()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index f1367aad10f1..3413c5171a74 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -41,6 +41,7 @@ import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.TABLE_BATCH_LIMIT; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; public class RepairRangeSplitterTest extends CQLTester { @@ -61,6 +62,95 @@ public void setUp() { tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); } + @Test + public void testReorderByPriorityWithDifferentPriorities() { + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + + // Test reordering assignments with different priorities + RepairAssignment assignment1 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table1)); + RepairAssignment assignment2 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table2)); + RepairAssignment assignment3 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table3)); + + // Assume these priorities based on the repair type + List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3)); + + repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); + + // Verify the order is by descending priority + assertEquals(assignment2, assignments.get(0)); + assertEquals(assignment1, assignments.get(1)); + assertEquals(assignment3, assignments.get(2)); + } + + @Test + public void testReorderByPriorityWithSamePriority() { + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + + // Test reordering assignments with the same priority + RepairAssignment assignment1 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table1)); + RepairAssignment assignment2 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table2)); + RepairAssignment assignment3 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table3)); + + List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3)); + + repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); + + // Verify the original order is preserved as all priorities are the same + assertEquals(assignment1, assignments.get(0)); + assertEquals(assignment2, assignments.get(1)); + assertEquals(assignment3, assignments.get(2)); + } + + @Test + public void testReorderByPriorityWithMixedPriorities() { + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table4 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + + // Test reordering assignments with mixed priorities + RepairAssignment assignment1 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table1)); + RepairAssignment assignment2 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table2)); + RepairAssignment assignment3 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table3)); + RepairAssignment assignment4 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table4)); + + List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3, assignment4)); + + repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); + + // Verify the order: highest priority first, then preserved order for same priority + assertEquals(assignment2, assignments.get(0)); // Priority 3 + assertEquals(assignment1, assignments.get(1)); // Priority 2 + assertEquals(assignment3, assignments.get(2)); // Priority 2 + assertEquals(assignment4, assignments.get(3)); // Priority 1 + } + + @Test + public void testReorderByPriorityWithEmptyList() { + // Test with an empty list (should remain empty) + List assignments = new ArrayList<>(); + repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); + assertTrue(assignments.isEmpty()); + } + + @Test + public void testReorderByPriorityWithOneElement() { + // Test with a single element (should remain unchanged) + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '5'}"); + + RepairAssignment assignment1 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table1)); + + List assignments = new ArrayList<>(Collections.singletonList(assignment1)); + + repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); + + assertEquals(assignment1, assignments.get(0)); // Single element should remain in place + } + @Test public void testGetRepairAssignmentsForTable_NoSSTables() { Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); @@ -215,6 +305,7 @@ public void testMergeWithDuplicateTables() { assertEquals(new HashSet<>(Arrays.asList("table1", "table2", "table3")), new HashSet<>(result.getTableNames())); } + private void insertAndFlushSingleTable(String tableName) throws Throwable { execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); flush(); From 2b70565d71ed871c876d9911c62fb39f00d124cf Mon Sep 17 00:00:00 2001 From: Chris Lohfink Date: Thu, 7 Nov 2024 21:03:05 -0600 Subject: [PATCH 071/257] reorder with DefaultAutoRepairTokenSplitter --- .../repair/autorepair/DefaultAutoRepairTokenSplitter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java index 7c118b99ac7d..c0ddc6b9aa7e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java @@ -74,6 +74,7 @@ public List getRepairAssignments(AutoRepairConfig.RepairType r } } } + reorderByPriority(repairAssignments, repairType); return repairAssignments; } } \ No newline at end of file From c53d550ac062810c3b4eae9f653c64cc8097df19 Mon Sep 17 00:00:00 2001 From: Chris Lohfink Date: Thu, 7 Nov 2024 22:48:30 -0600 Subject: [PATCH 072/257] add tests for partition count estimation --- .../autorepair/RepairRangeSplitter.java | 8 +- .../autorepair/RepairRangeSplitterTest.java | 98 +++++++++++++------ 2 files changed, 71 insertions(+), 35 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index a13fa9748bb5..ffb104a53686 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -147,9 +147,7 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); if (cfs1 == null || cfs2 == null) - { throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, t1)); - } return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); }); for (String tableName : tableNames) @@ -310,7 +308,8 @@ private List getRangeSizeEstimate(AutoRepairConfig.RepairType repa return sizeEstimates; } - private static SizeEstimate getSizesForRangeOfSSTables(String keyspace, String table, Range tokenRange, Refs refs) + @VisibleForTesting + static SizeEstimate getSizesForRangeOfSSTables(String keyspace, String table, Range tokenRange, Refs refs) { ICardinality cardinality = new HyperLogLogPlus(13, 25); long approxBytesInRange = 0L; @@ -360,7 +359,8 @@ private static SizeEstimate getSizesForRangeOfSSTables(String keyspace, String t return new SizeEstimate(keyspace, table, tokenRange, partitions, approxBytesInRange, totalBytes); } - private static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Range tokenRange) + @VisibleForTesting + static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Range tokenRange) { final ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index 3413c5171a74..aa81e75300cf 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -37,7 +37,9 @@ import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.utils.concurrent.Refs; import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.TABLE_BATCH_LIMIT; import static org.junit.Assert.assertEquals; @@ -47,13 +49,13 @@ public class RepairRangeSplitterTest extends CQLTester { private RepairRangeSplitter repairRangeSplitter; private String tableName; - private static Range fullRange; + private static Range FULL_RANGE; @BeforeClass public static void setUpClass() { CQLTester.setUpClass(); - fullRange = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); + FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); } @Before @@ -62,6 +64,36 @@ public void setUp() { tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); } + @Test + public void testSizePartitionCount() throws Throwable + { + insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + Refs sstables = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, FULL_RANGE); + assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); + RepairRangeSplitter.SizeEstimate sizes = RepairRangeSplitter.getSizesForRangeOfSSTables(KEYSPACE, tableName, FULL_RANGE, sstables); + assertEquals(10, sizes.partitions); + } + + @Test + public void testSizePartitionCountSplit() throws Throwable + { + int[] values = new int[10000]; + for (int i = 0; i < values.length; i++) + values[i] = i + 1; + insertAndFlushTable(tableName, values); + Iterator> range = AutoRepairUtils.split(FULL_RANGE, 2).iterator(); + Range tokenRange1 = range.next(); + Range tokenRange2 = range.next(); + Assert.assertFalse(range.hasNext()); + + Refs sstables1 = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange1); + Refs sstables2 = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange2); + RepairRangeSplitter.SizeEstimate sizes1 = RepairRangeSplitter.getSizesForRangeOfSSTables(KEYSPACE, tableName, tokenRange1, sstables1); + RepairRangeSplitter.SizeEstimate sizes2 = RepairRangeSplitter.getSizesForRangeOfSSTables(KEYSPACE, tableName, tokenRange2, sstables2); + // +-5% because HLL merge and the applying of range size approx ratio causes estimation errors + assertTrue(Math.abs(10000 - (sizes1.partitions + sizes2.partitions)) <= 60); + } + @Test public void testReorderByPriorityWithDifferentPriorities() { String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); @@ -69,9 +101,9 @@ public void testReorderByPriorityWithDifferentPriorities() { String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); // Test reordering assignments with different priorities - RepairAssignment assignment1 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table1)); - RepairAssignment assignment2 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table2)); - RepairAssignment assignment3 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table3)); + RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table1)); + RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table2)); + RepairAssignment assignment3 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table3)); // Assume these priorities based on the repair type List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3)); @@ -91,9 +123,9 @@ public void testReorderByPriorityWithSamePriority() { String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); // Test reordering assignments with the same priority - RepairAssignment assignment1 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table1)); - RepairAssignment assignment2 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table2)); - RepairAssignment assignment3 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table3)); + RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table1)); + RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table2)); + RepairAssignment assignment3 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table3)); List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3)); @@ -113,10 +145,10 @@ public void testReorderByPriorityWithMixedPriorities() { String table4 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); // Test reordering assignments with mixed priorities - RepairAssignment assignment1 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table1)); - RepairAssignment assignment2 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table2)); - RepairAssignment assignment3 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table3)); - RepairAssignment assignment4 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table4)); + RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table1)); + RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table2)); + RepairAssignment assignment3 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table3)); + RepairAssignment assignment4 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table4)); List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3, assignment4)); @@ -142,7 +174,7 @@ public void testReorderByPriorityWithOneElement() { // Test with a single element (should remain unchanged) String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '5'}"); - RepairAssignment assignment1 = new RepairAssignment(fullRange, KEYSPACE, Collections.singletonList(table1)); + RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table1)); List assignments = new ArrayList<>(Collections.singletonList(assignment1)); @@ -169,7 +201,7 @@ public void testGetRepairAssignmentsForTable_Single() throws Throwable { @Test public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable { repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "2")); - Collection> ranges = Collections.singleton(fullRange); + Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); @@ -183,7 +215,7 @@ public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable { @Test public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable { repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "2")); - Collection> ranges = Collections.singleton(fullRange); + Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(2); List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); @@ -196,7 +228,7 @@ public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable { @Test public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable { repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "1")); - Collection> ranges = Collections.singleton(fullRange); + Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); @@ -207,7 +239,7 @@ public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable { @Test public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "100")); - Collection> ranges = Collections.singleton(fullRange); + Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(5); List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); @@ -215,8 +247,6 @@ public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { assertEquals(1, assignments.size()); } - - @Test(expected = IllegalStateException.class) public void testMergeEmptyAssignments() { // Test when the list of assignments is empty @@ -230,12 +260,12 @@ public void testMergeSingleAssignment() { String keyspaceName = "testKeyspace"; List tableNames = Arrays.asList("table1", "table2"); - RepairAssignment assignment = new RepairAssignment(fullRange, keyspaceName, tableNames); + RepairAssignment assignment = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames); List assignments = Collections.singletonList(assignment); RepairAssignment result = RepairRangeSplitter.merge(assignments); - assertEquals(fullRange, result.getTokenRange()); + assertEquals(FULL_RANGE, result.getTokenRange()); assertEquals(keyspaceName, result.getKeyspaceName()); assertEquals(new HashSet<>(tableNames), new HashSet<>(result.getTableNames())); } @@ -247,13 +277,13 @@ public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() { List tableNames1 = Arrays.asList("table1", "table2"); List tableNames2 = Arrays.asList("table2", "table3"); - RepairAssignment assignment1 = new RepairAssignment(fullRange, keyspaceName, tableNames1); - RepairAssignment assignment2 = new RepairAssignment(fullRange, keyspaceName, tableNames2); + RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames1); + RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames2); List assignments = Arrays.asList(assignment1, assignment2); RepairAssignment result = RepairRangeSplitter.merge(assignments); - assertEquals(fullRange, result.getTokenRange()); + assertEquals(FULL_RANGE, result.getTokenRange()); assertEquals(keyspaceName, result.getKeyspaceName()); assertEquals(new HashSet<>(Arrays.asList("table1", "table2", "table3")), new HashSet<>(result.getTableNames())); } @@ -261,7 +291,7 @@ public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() { @Test(expected = IllegalStateException.class) public void testMergeDifferentTokenRange() { // Test merging assignments with different token ranges - Iterator> range = AutoRepairUtils.split(fullRange, 2).iterator(); // Split the full range into two ranges ie (0-100, 100-200 + Iterator> range = AutoRepairUtils.split(FULL_RANGE, 2).iterator(); // Split the full range into two ranges ie (0-100, 100-200 Range tokenRange1 = range.next(); Range tokenRange2 = range.next(); Assert.assertFalse(range.hasNext()); @@ -281,8 +311,8 @@ public void testMergeDifferentKeyspaceName() { // Test merging assignments with different keyspace names List tableNames = Arrays.asList("table1", "table2"); - RepairAssignment assignment1 = new RepairAssignment(fullRange, "keyspace1", tableNames); - RepairAssignment assignment2 = new RepairAssignment(fullRange, "keyspace2", tableNames); + RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, "keyspace1", tableNames); + RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, "keyspace2", tableNames); List assignments = Arrays.asList(assignment1, assignment2); RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException @@ -295,8 +325,8 @@ public void testMergeWithDuplicateTables() { List tableNames1 = Arrays.asList("table1", "table2"); List tableNames2 = Arrays.asList("table2", "table3"); - RepairAssignment assignment1 = new RepairAssignment(fullRange, keyspaceName, tableNames1); - RepairAssignment assignment2 = new RepairAssignment(fullRange, keyspaceName, tableNames2); + RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames1); + RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames2); List assignments = Arrays.asList(assignment1, assignment2); RepairAssignment result = RepairRangeSplitter.merge(assignments); @@ -322,9 +352,15 @@ private List createAndInsertTables(int count) throws Throwable { } private void insertAndFlushTable(String tableName) throws Throwable { - executeFormattedQuery("INSERT INTO " + KEYSPACE + '.' + tableName + " (k, v) values (?, ?)", 1, 1); + insertAndFlushTable(tableName, 1); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(KEYSPACE, tableName); + } + private void insertAndFlushTable(String tableName, int... vals) throws Throwable { + for (int i : vals) + { + executeFormattedQuery("INSERT INTO " + KEYSPACE + '.' + tableName + " (k, v) values (?, ?)", i, i); + } ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(KEYSPACE, tableName); cfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); - assertEquals(1, cfs.getLiveSSTables().size()); } } From 2a70b238f0514c74d1ae6a1a0ad85806ddfebc85 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 10 Nov 2024 14:29:56 -0600 Subject: [PATCH 073/257] Add license header to RepairRangeSplitter --- .../repair/autorepair/RepairRangeSplitter.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index ffb104a53686..8b0f2f8e3dd9 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.cassandra.repair.autorepair; import java.io.IOException; From 8868074c6935cd9baaadf4aa8a1ec0a7f0b24dc2 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 10 Nov 2024 15:19:24 -0600 Subject: [PATCH 074/257] Simplify access of repair_type_overrides, key by String A lot of care existed to ensure each RepairType was populated in repair_type_overrides (using ensureOverrides). This could be simplified quite a bit by protecting access to repair_type_overrides by enclosing it in a getOptions method that ensures the provided RepairType has a populated entry. Additionally, change the key type to String and use ConcurrentMap; otherwise the change to make the enum names all caps does not work. --- .../repair/autorepair/AutoRepairConfig.java | 126 ++++++++++-------- .../autorepair/AutoRepairConfigTest.java | 40 +++--- 2 files changed, 85 insertions(+), 81 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 4a06d78c6eb6..b84ee5f2f652 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -19,17 +19,16 @@ package org.apache.cassandra.repair.autorepair; import java.io.Serializable; -import java.util.ArrayList; import java.util.Collections; -import java.util.EnumMap; import java.util.HashSet; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentMap; import java.util.function.Function; +import javax.annotation.Nonnull; + import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Maps; import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.config.ParameterizedClass; @@ -62,6 +61,27 @@ public enum RepairType implements Serializable INCREMENTAL, PREVIEW_REPAIRED; + private final String configName; + + RepairType() + { + this.configName = name().toLowerCase(); + } + + RepairType(String configName) + { + this.configName = configName; + } + + /** + * @return Format of the repair type as it should be represented in configuration. + * Canonically this is the enum name in lowerCase. + */ + public String getConfigName() + { + return configName; + } + public static AutoRepairState getAutoRepairState(RepairType repairType) { switch (repairType) @@ -78,8 +98,9 @@ public static AutoRepairState getAutoRepairState(RepairType repairType) } } - // repair_type_overrides overrides the global_settings for a specific repair type - public volatile Map repair_type_overrides = new EnumMap<>(RepairType.class); + // repair_type_overrides overrides the global_settings for a specific repair type. String used as key instead + // of enum to allow lower case key in yaml. + public volatile ConcurrentMap repair_type_overrides = Maps.newConcurrentMap(); public AutoRepairConfig() { @@ -90,10 +111,6 @@ public AutoRepairConfig(boolean enabled) { this.enabled = enabled; global_settings = Options.getDefaultOptions(); - for (RepairType type : RepairType.values()) - { - repair_type_overrides.put(type, new Options()); - } } public DurationSpec.IntSecondsBound getRepairCheckInterval() @@ -149,14 +166,12 @@ public boolean isAutoRepairEnabled(RepairType repairType) public void setAutoRepairEnabled(RepairType repairType, boolean enabled) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).enabled = enabled; + getOptions(repairType).enabled = enabled; } public void setRepairByKeyspace(RepairType repairType, boolean repairByKeyspace) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).repair_by_keyspace = repairByKeyspace; + getOptions(repairType).repair_by_keyspace = repairByKeyspace; } public boolean getRepairByKeyspace(RepairType repairType) @@ -171,8 +186,7 @@ public int getRepairThreads(RepairType repairType) public void setRepairThreads(RepairType repairType, int repairThreads) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).number_of_repair_threads = repairThreads; + getOptions(repairType).number_of_repair_threads = repairThreads; } public int getRepairSubRangeNum(RepairType repairType) @@ -182,8 +196,7 @@ public int getRepairSubRangeNum(RepairType repairType) public void setRepairSubRangeNum(RepairType repairType, int repairSubRanges) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).number_of_subranges = repairSubRanges; + getOptions(repairType).number_of_subranges = repairSubRanges; } public DurationSpec.IntSecondsBound getRepairMinInterval(RepairType repairType) @@ -193,8 +206,7 @@ public DurationSpec.IntSecondsBound getRepairMinInterval(RepairType repairType) public void setRepairMinInterval(RepairType repairType, String minRepairInterval) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).min_repair_interval = new DurationSpec.IntSecondsBound(minRepairInterval); + getOptions(repairType).min_repair_interval = new DurationSpec.IntSecondsBound(minRepairInterval); } public int getRepairSSTableCountHigherThreshold(RepairType repairType) @@ -204,8 +216,7 @@ public int getRepairSSTableCountHigherThreshold(RepairType repairType) public void setRepairSSTableCountHigherThreshold(RepairType repairType, int sstableHigherThreshold) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).sstable_upper_threshold = sstableHigherThreshold; + getOptions(repairType).sstable_upper_threshold = sstableHigherThreshold; } public DurationSpec.IntSecondsBound getAutoRepairTableMaxRepairTime(RepairType repairType) @@ -215,8 +226,7 @@ public DurationSpec.IntSecondsBound getAutoRepairTableMaxRepairTime(RepairType r public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).table_max_repair_time = new DurationSpec.IntSecondsBound(autoRepairTableMaxRepairTime); + getOptions(repairType).table_max_repair_time = new DurationSpec.IntSecondsBound(autoRepairTableMaxRepairTime); } public Set getIgnoreDCs(RepairType repairType) @@ -226,8 +236,7 @@ public Set getIgnoreDCs(RepairType repairType) public void setIgnoreDCs(RepairType repairType, Set ignoreDCs) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).ignore_dcs = ignoreDCs; + getOptions(repairType).ignore_dcs = ignoreDCs; } public boolean getRepairPrimaryTokenRangeOnly(RepairType repairType) @@ -237,8 +246,7 @@ public boolean getRepairPrimaryTokenRangeOnly(RepairType repairType) public void setRepairPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).repair_primary_token_range_only = primaryTokenRangeOnly; + getOptions(repairType).repair_primary_token_range_only = primaryTokenRangeOnly; } public int getParallelRepairPercentage(RepairType repairType) @@ -248,8 +256,7 @@ public int getParallelRepairPercentage(RepairType repairType) public void setParallelRepairPercentage(RepairType repairType, int percentage) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).parallel_repair_percentage = percentage; + getOptions(repairType).parallel_repair_percentage = percentage; } public int getParallelRepairCount(RepairType repairType) @@ -259,8 +266,7 @@ public int getParallelRepairCount(RepairType repairType) public void setParallelRepairCount(RepairType repairType, int count) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).parallel_repair_count = count; + getOptions(repairType).parallel_repair_count = count; } public boolean getMVRepairEnabled(RepairType repairType) @@ -270,14 +276,12 @@ public boolean getMVRepairEnabled(RepairType repairType) public void setMVRepairEnabled(RepairType repairType, boolean enabled) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).mv_repair_enabled = enabled; + getOptions(repairType).mv_repair_enabled = enabled; } public void setForceRepairNewNode(RepairType repairType, boolean forceRepairNewNode) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).force_repair_new_node = forceRepairNewNode; + getOptions(repairType).force_repair_new_node = forceRepairNewNode; } public boolean getForceRepairNewNode(RepairType repairType) @@ -292,8 +296,7 @@ public ParameterizedClass getTokenRangeSplitter(RepairType repairType) public void setInitialSchedulerDelay(RepairType repairType, String initialSchedulerDelay) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).initial_scheduler_delay = new DurationSpec.IntSecondsBound(initialSchedulerDelay); + getOptions(repairType).initial_scheduler_delay = new DurationSpec.IntSecondsBound(initialSchedulerDelay); } public DurationSpec.IntSecondsBound getInitialSchedulerDelay(RepairType repairType) @@ -308,8 +311,7 @@ public DurationSpec.IntSecondsBound getRepairSessionTimeout(RepairType repairTyp public void setRepairSessionTimeout(RepairType repairType, String repairSessionTimeout) { - ensureOverrides(repairType); - repair_type_overrides.get(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); + getOptions(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); } // Options configures auto-repair behavior for a given repair type. @@ -438,31 +440,37 @@ public String toString() } } - @VisibleForTesting - protected T applyOverrides(RepairType repairType, Function optionSupplier) + @Nonnull + protected Options getOptions(RepairType repairType) { - ArrayList optsProviders = new ArrayList<>(); - if (repair_type_overrides != null) - { - optsProviders.add(repair_type_overrides.get(repairType)); - } - optsProviders.add(global_settings); - optsProviders.add(Options.defaultOptions); + return repair_type_overrides.computeIfAbsent(repairType.getConfigName(), k -> new Options()); + } - return optsProviders.stream() - .map(opt -> Optional.ofNullable(opt).map(optionSupplier).orElse(null)) - .filter(Objects::nonNull) - .findFirst() - .orElse(null); + private static T getOverride(Options options, Function optionSupplier) + { + return options != null ? optionSupplier.apply(options) : null; } - protected void ensureOverrides(RepairType repairType) + @VisibleForTesting + protected T applyOverrides(RepairType repairType, Function optionSupplier) { - if (repair_type_overrides == null) + // Check option by repair type first + Options repairTypeOverrides = getOptions(repairType); + T val = optionSupplier.apply(repairTypeOverrides); + + if (val != null) + return val; + + // Check option in global settings + if (global_settings != null) { - repair_type_overrides = new EnumMap<>(RepairType.class); + val = getOverride(global_settings, optionSupplier); + + if (val != null) + return val; } - repair_type_overrides.computeIfAbsent(repairType, k -> new Options()); + // Otherwise check defaults + return getOverride(Options.defaultOptions, optionSupplier); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 4f1dd029ef5e..08ae9631a192 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.repair.autorepair; -import java.util.EnumMap; import java.util.Objects; import java.util.Collections; import java.util.Set; @@ -63,7 +62,6 @@ public static Object[] repairTypes() public void setUp() { config = new AutoRepairConfig(true); - config.repair_type_overrides = null; AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); } @@ -80,7 +78,7 @@ public void autoRepairConfigRepairTypesAreNotNull() AutoRepairConfig config = new AutoRepairConfig(); for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { - assertNotNull(config.repair_type_overrides.get(repairType)); + assertNotNull(config.getOptions(repairType)); } } @@ -105,10 +103,8 @@ public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledForRepairType() { config.global_settings.enabled = true; - config.repair_type_overrides = new EnumMap<>(AutoRepairConfig.RepairType.class); - config.repair_type_overrides.put(repairType, new Options()); - config.repair_type_overrides.get(repairType).enabled = false; - assertFalse(config.isAutoRepairEnabled(repairType)); + config.setAutoRepairEnabled(repairType, false); + assertFalse(config.getOptions(repairType).enabled); } @Test @@ -118,7 +114,7 @@ public void testSetAutoRepairEnabledNoMVOrCDC() DatabaseDescriptor.setMaterializedViewsEnabled(false); config.setAutoRepairEnabled(repairType, true); - assertTrue(config.repair_type_overrides.get(repairType).enabled); + assertTrue(config.getOptions(repairType).enabled); } @Test @@ -126,7 +122,7 @@ public void testSetRepairByKeyspace() { config.setRepairByKeyspace(repairType, true); - assertTrue(config.repair_type_overrides.get(repairType).repair_by_keyspace); + assertTrue(config.getOptions(repairType).repair_by_keyspace); } @Test @@ -144,7 +140,7 @@ public void testSetRepairThreads() { config.setRepairThreads(repairType, 5); - assert config.repair_type_overrides.get(repairType).number_of_repair_threads == 5; + assert config.getOptions(repairType).number_of_repair_threads == 5; } @Test @@ -172,7 +168,7 @@ public void testSetRepairSubRangeNum() { config.setRepairSubRangeNum(repairType, 5); - assert config.repair_type_overrides.get(repairType).number_of_subranges == 5; + assert config.getOptions(repairType).number_of_subranges == 5; } @Test @@ -190,7 +186,7 @@ public void testSetRepairMinFrequencyInHours() { config.setRepairMinInterval(repairType, "5s"); - assert config.repair_type_overrides.get(repairType).min_repair_interval.toSeconds() == 5; + assert config.getOptions(repairType).min_repair_interval.toSeconds() == 5; } @Test @@ -226,7 +222,7 @@ public void testSetRepairSSTableCountHigherThreshold() { config.setRepairSSTableCountHigherThreshold(repairType, 5); - assert config.repair_type_overrides.get(repairType).sstable_upper_threshold == 5; + assert config.getOptions(repairType).sstable_upper_threshold == 5; } @Test @@ -244,7 +240,7 @@ public void testSetAutoRepairTableMaxRepairTimeInSec() { config.setAutoRepairTableMaxRepairTime(repairType, "5s"); - assert config.repair_type_overrides.get(repairType).table_max_repair_time.toSeconds() == 5; + assert config.getOptions(repairType).table_max_repair_time.toSeconds() == 5; } @Test @@ -262,7 +258,7 @@ public void testSetIgnoreDCs() { config.setIgnoreDCs(repairType, testSet); - assertEquals(config.repair_type_overrides.get(repairType).ignore_dcs, testSet); + assertEquals(config.getOptions(repairType).ignore_dcs, testSet); } @Test @@ -280,7 +276,7 @@ public void testSetRepairPrimaryTokenRangeOnly() { config.setRepairPrimaryTokenRangeOnly(repairType, true); - assertTrue(config.repair_type_overrides.get(repairType).repair_primary_token_range_only); + assertTrue(config.getOptions(repairType).repair_primary_token_range_only); } @Test @@ -298,7 +294,7 @@ public void testSetParallelRepairPercentageInGroup() { config.setParallelRepairPercentage(repairType, 5); - assert config.repair_type_overrides.get(repairType).parallel_repair_percentage == 5; + assert config.getOptions(repairType).parallel_repair_percentage == 5; } @Test @@ -316,7 +312,7 @@ public void testSetParallelRepairCountInGroup() { config.setParallelRepairCount(repairType, 5); - assert config.repair_type_overrides.get(repairType).parallel_repair_count == 5; + assert config.getOptions(repairType).parallel_repair_count == 5; } @Test @@ -334,7 +330,7 @@ public void testSetMVRepairEnabled() { config.setMVRepairEnabled(repairType, true); - assertTrue(config.repair_type_overrides.get(repairType).mv_repair_enabled); + assertTrue(config.getOptions(repairType).mv_repair_enabled); } @Test @@ -342,7 +338,7 @@ public void testSetForceRepairNewNode() { config.setForceRepairNewNode(repairType, true); - assertTrue(config.repair_type_overrides.get(repairType).force_repair_new_node); + assertTrue(config.getOptions(repairType).force_repair_new_node); } @Test @@ -403,7 +399,7 @@ public void testSetInitialSchedulerDelay() { config.setInitialSchedulerDelay(repairType, "5s"); - assert config.repair_type_overrides.get(repairType).initial_scheduler_delay.toSeconds() == 5; + assert config.getOptions(repairType).initial_scheduler_delay.toSeconds() == 5; } @Test @@ -421,7 +417,7 @@ public void testSetRepairSessionTimeout() { config.setRepairSessionTimeout(repairType, "1h"); - assert config.repair_type_overrides.get(repairType).repair_session_timeout.toSeconds() == 3600; + assert config.getOptions(repairType).repair_session_timeout.toSeconds() == 3600; } } From 6d473100dd85907e7b193bf1cff381d82104964d Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 10 Nov 2024 22:39:07 -0600 Subject: [PATCH 075/257] Allow partial repair of range, factor in total bytes for IR * Rename maxBytesPerScheduleBytes to maxBytesPerSchedule * Allow partial repair of range if the smallest subrange we can repair is less than the remaining bytes we can repair. * Consider totalSize for IR (because of anticompaction) sizeinRange for full repair. * Increased logging while debugging. --- .../autorepair/RepairRangeSplitter.java | 104 ++++++++++++++---- .../autorepair/RepairRangeSplitterTest.java | 6 +- 2 files changed, 83 insertions(+), 27 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 8b0f2f8e3dd9..a6b138a85996 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -51,6 +50,7 @@ import org.apache.cassandra.io.sstable.format.big.BigTableScanner; import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; import org.apache.cassandra.io.sstable.metadata.MetadataType; +import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.concurrent.Refs; @@ -90,7 +90,7 @@ public class RepairRangeSplitter implements IAutoRepairTokenRangeSplitter static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; private final int tablesPerAssignmentLimit; - private final long maxBytesPerScheduleBytes; + private final long maxBytesPerSchedule; private final long bytesPerSubrange; private final long partitionsPerSubrange; @@ -115,11 +115,11 @@ public RepairRangeSplitter(Map parameters) if (parameters.containsKey(MAX_BYTES_PER_SCHEDULE)) { - maxBytesPerScheduleBytes = new DataStorageSpec.LongBytesBound(parameters.get(MAX_BYTES_PER_SCHEDULE)).toBytes(); + maxBytesPerSchedule = new DataStorageSpec.LongBytesBound(parameters.get(MAX_BYTES_PER_SCHEDULE)).toBytes(); } else { - maxBytesPerScheduleBytes = DEFAULT_MAX_BYTES_PER_SCHEDULE; + maxBytesPerSchedule = DEFAULT_MAX_BYTES_PER_SCHEDULE; } if (parameters.containsKey(PARTITION_COUNT)) @@ -141,7 +141,7 @@ public RepairRangeSplitter(Map parameters) } logger.info("Configured {} with {}={}, {}={}, {}={}, {}={}", RepairRangeSplitter.class.getName(), - SUBRANGE_SIZE, subrangeSize, MAX_BYTES_PER_SCHEDULE, maxBytesPerScheduleBytes, + SUBRANGE_SIZE, subrangeSize, MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule, PARTITION_COUNT, partitionsPerSubrange, TABLE_BATCH_LIMIT, tablesPerAssignmentLimit); } @@ -234,7 +234,10 @@ public List getRepairAssignmentsForTable(AutoRepairConfig.Repa Collections.shuffle(sizeEstimates); for (SizeEstimate estimate : sizeEstimates) { - if (estimate.sizeInRange == 0) + // Calculate the smallest subrange if we were to split, this can be used to determine if we can do any + // work if we have a non-zero estimate. + long smallestSubrange = Math.min(estimate.sizeForRepair, bytesPerSubrange); + if (estimate.sizeForRepair == 0) { ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); @@ -249,31 +252,40 @@ public List getRepairAssignmentsForTable(AutoRepairConfig.Repa logger.debug("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", keyspaceName, tableName, estimate.tokenRange); } } - else if (targetBytesSoFar + estimate.sizeInRange < maxBytesPerScheduleBytes) + else if (targetBytesSoFar + smallestSubrange < maxBytesPerSchedule) { - targetBytesSoFar += estimate.sizeInRange; // Check if the estimate needs splitting based on the criteria - boolean needsSplitting = estimate.sizeInRange > bytesPerSubrange || estimate.partitions > partitionsPerSubrange; + boolean needsSplitting = estimate.sizeForRepair > bytesPerSubrange || estimate.partitions > partitionsPerSubrange; if (needsSplitting) { - // Calculate the number of splits needed for size and partitions - int splitsForSize = (int) Math.ceil((double) estimate.sizeInRange / bytesPerSubrange); - int splitsForPartitions = (int) Math.ceil((double) estimate.partitions / partitionsPerSubrange); - - // Choose the larger of the two as the number of splits - int numberOfSplits = Math.max(splitsForSize, splitsForPartitions); - - // Split the token range into subranges + int numberOfSplits = calculateNumberOfSplits(estimate); + long approximateBytesPerSplit = estimate.sizeForRepair / numberOfSplits; Collection> subranges = split(estimate.tokenRange, numberOfSplits); for (Range subrange : subranges) { + if (targetBytesSoFar + approximateBytesPerSplit > maxBytesPerSchedule) + { + logger.warn("Refusing to add repair assignment for {}.{} for subrange {} with a approximateBytesPerSplit={} because it would increase total repair bytes to {} which is greater than {}={}", + keyspaceName, tableName, subrange, FileUtils.stringifyFileSize(approximateBytesPerSplit), FileUtils.stringifyFileSize(targetBytesSoFar + approximateBytesPerSplit), + MAX_BYTES_PER_SCHEDULE, FileUtils.stringifyFileSize(maxBytesPerSchedule)); + break; + } + logger.info("Added repair assignment for {}.{} for subrange {} (#{}/{})", + keyspaceName, tableName, subrange, repairAssignments.size() + 1, numberOfSplits); RepairAssignment assignment = new RepairAssignment(subrange, keyspaceName, Collections.singletonList(tableName)); repairAssignments.add(assignment); + targetBytesSoFar += approximateBytesPerSplit; } } else { + logger.info("Using 1 repair assignment for {}.{} for range {} as rangeBytes={} is less than {}={} and partitionEstimate={} is less than {}={}", + keyspaceName, tableName, estimate.tokenRange, + FileUtils.stringifyFileSize(estimate.sizeForRepair), + SUBRANGE_SIZE, FileUtils.stringifyFileSize(bytesPerSubrange), + estimate.partitions, + PARTITION_COUNT, partitionsPerSubrange); // No splitting needed, repair the entire range as-is RepairAssignment assignment = new RepairAssignment(estimate.tokenRange, keyspaceName, Collections.singletonList(tableName)); repairAssignments.add(assignment); @@ -281,13 +293,45 @@ else if (targetBytesSoFar + estimate.sizeInRange < maxBytesPerScheduleBytes) } else { + long calculatedSize = targetBytesSoFar + estimate.sizeForRepair; // Really this is "Ok" but it does mean we are relying on randomness to cover the other ranges - logger.info("Skipping range {} for {}.{} as it would exceed maxBytesPerScheduleBytes, consider increasing maxBytesPerScheduleBytes, reducing node density or monitoring to ensure all ranges do get repaired within gc_grace_seconds", estimate.tokenRange, keyspaceName, tableName); + logger.info("Skipping range {} for {}.{} as {} would exceed {}={}, " + + "consider increasing {}, reducing node denity or monitoring to ensure all ranges do get repaired within gc_grace_seconds", + estimate.tokenRange, + keyspaceName, tableName, + FileUtils.stringifyFileSize(calculatedSize), + MAX_BYTES_PER_SCHEDULE, FileUtils.stringifyFileSize(calculatedSize), MAX_BYTES_PER_SCHEDULE); } } return repairAssignments; } + private int calculateNumberOfSplits(SizeEstimate estimate) + { + // Calculate the number of splits needed for size and partitions + int splitsForSize = (int) Math.ceil((double) estimate.sizeForRepair / bytesPerSubrange); + int splitsForPartitions = (int) Math.ceil((double) estimate.partitions / partitionsPerSubrange); + + // Split the token range into subranges based on whichever (partitions, bytes) would generate the most splits. + boolean splitBySize = splitsForSize > splitsForPartitions; + int splits = splitBySize ? splitsForSize : splitsForPartitions; + + // calculate approximation for logging purposes + long approximateBytesPerSplit = estimate.sizeForRepair / splits; + long approximatePartitionsPerSplit = estimate.partitions / splits; + + logger.info("Splitting {}.{} for range {} into {} sub ranges by {} (splitsForSize={}, splitsForPartitions={}, " + + "approximateBytesInRange={}, approximatePartitionsInRange={}, " + + "approximateBytesPerSplit={}, approximatePartitionsPerSplit={})", + estimate.keyspace, estimate.table, estimate.tokenRange, + splits, splitBySize ? "size" : "partitions", + splitsForSize, splitsForPartitions, + FileUtils.stringifyFileSize(estimate.sizeForRepair), estimate.partitions, + FileUtils.stringifyFileSize(approximateBytesPerSplit), approximatePartitionsPerSplit + ); + return splits; + } + public Collection> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) { // Collect all applicable token ranges @@ -318,7 +362,7 @@ private List getRangeSizeEstimate(AutoRepairConfig.RepairType repa logger.debug("Calculating size estimate for {}.{} for range {}", keyspace, table, tokenRange); try (Refs refs = getSSTableReaderRefs(repairType, keyspace, table, tokenRange)) { - SizeEstimate estimate = getSizesForRangeOfSSTables(keyspace, table, tokenRange, refs); + SizeEstimate estimate = getSizesForRangeOfSSTables(repairType, keyspace, table, tokenRange, refs); logger.debug("Size estimate for {}.{} for range {} is {}", keyspace, table, tokenRange, estimate); sizeEstimates.add(estimate); } @@ -327,7 +371,7 @@ private List getRangeSizeEstimate(AutoRepairConfig.RepairType repa } @VisibleForTesting - static SizeEstimate getSizesForRangeOfSSTables(String keyspace, String table, Range tokenRange, Refs refs) + static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repairType, String keyspace, String table, Range tokenRange, Refs refs) { ICardinality cardinality = new HyperLogLogPlus(13, 25); long approxBytesInRange = 0L; @@ -374,7 +418,7 @@ static SizeEstimate getSizesForRangeOfSSTables(String keyspace, String table, Ra double ratio = approxBytesInRange / (double) totalBytes; // use the ratio from size to estimate the partitions in the range as well long partitions = (long) Math.max(1, Math.ceil(cardinality.cardinality() * ratio)); - return new SizeEstimate(keyspace, table, tokenRange, partitions, approxBytesInRange, totalBytes); + return new SizeEstimate(repairType, keyspace, table, tokenRange, partitions, approxBytesInRange, totalBytes); } @VisibleForTesting @@ -397,7 +441,7 @@ static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repa List canonicalSSTables = View.sstablesInBounds(r.left, r.right, tree); if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) { - canonicalSSTables = canonicalSSTables.stream().filter(SSTableReader::isRepaired).collect(Collectors.toList()); + canonicalSSTables = canonicalSSTables.stream().filter((sstable) -> !sstable.isRepaired()).collect(Collectors.toList()); } refs = Refs.tryRef(canonicalSSTables); } @@ -407,21 +451,33 @@ static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repa @VisibleForTesting protected static class SizeEstimate { + public final AutoRepairConfig.RepairType repairType; public final String keyspace; public final String table; public final Range tokenRange; public final long partitions; public final long sizeInRange; public final long totalSize; - - public SizeEstimate(String keyspace, String table, Range tokenRange, long partitions, long sizeInRange, long totalSize) + /** + * Size to consider in the repair. For incremental repair, we want to consider the total size + * of the estimate as we have to factor in anticompacting the entire SSTable. + * For full repair, just use the size containing the range. + */ + public final long sizeForRepair; + + public SizeEstimate(AutoRepairConfig.RepairType repairType, + String keyspace, String table, Range tokenRange, + long partitions, long sizeInRange, long totalSize) { + this.repairType = repairType; this.keyspace = keyspace; this.table = table; this.tokenRange = tokenRange; this.partitions = partitions; this.sizeInRange = sizeInRange; this.totalSize = totalSize; + + this.sizeForRepair = repairType == AutoRepairConfig.RepairType.INCREMENTAL ? totalSize : sizeInRange; } } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index aa81e75300cf..71b60a92232e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -70,7 +70,7 @@ public void testSizePartitionCount() throws Throwable insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); Refs sstables = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, FULL_RANGE); assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); - RepairRangeSplitter.SizeEstimate sizes = RepairRangeSplitter.getSizesForRangeOfSSTables(KEYSPACE, tableName, FULL_RANGE, sstables); + RepairRangeSplitter.SizeEstimate sizes = RepairRangeSplitter.getSizesForRangeOfSSTables(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); assertEquals(10, sizes.partitions); } @@ -88,8 +88,8 @@ public void testSizePartitionCountSplit() throws Throwable Refs sstables1 = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange1); Refs sstables2 = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange2); - RepairRangeSplitter.SizeEstimate sizes1 = RepairRangeSplitter.getSizesForRangeOfSSTables(KEYSPACE, tableName, tokenRange1, sstables1); - RepairRangeSplitter.SizeEstimate sizes2 = RepairRangeSplitter.getSizesForRangeOfSSTables(KEYSPACE, tableName, tokenRange2, sstables2); + RepairRangeSplitter.SizeEstimate sizes1 = RepairRangeSplitter.getSizesForRangeOfSSTables(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); + RepairRangeSplitter.SizeEstimate sizes2 = RepairRangeSplitter.getSizesForRangeOfSSTables(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); // +-5% because HLL merge and the applying of range size approx ratio causes estimation errors assertTrue(Math.abs(10000 - (sizes1.partitions + sizes2.partitions)) <= 60); } From 61e7a3d2ef18bbce32c3be673cf13a0a24a2f603 Mon Sep 17 00:00:00 2001 From: Chris Lohfink Date: Thu, 14 Nov 2024 11:41:51 -0600 Subject: [PATCH 076/257] update warnings based on repair type --- .../autorepair/RepairRangeSplitter.java | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index a6b138a85996..3fbc2239f0c8 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -101,7 +101,6 @@ public class RepairRangeSplitter implements IAutoRepairTokenRangeSplitter public RepairRangeSplitter(Map parameters) { - // Demonstrates parameterizing a range splitter so we can have splitter specific options. DataStorageSpec.LongBytesBound subrangeSize; if (parameters.containsKey(SUBRANGE_SIZE)) { @@ -175,7 +174,7 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy if (tableAssignments.isEmpty()) continue; - // If the table assignments are for the same token range and we have room to add more tables to the current assignment + // If the table assignments are for the same token range, and we have room to add more tables to the current assignment if (tableAssignments.size() == 1 && currentAssignments.size() < tablesPerAssignmentLimit && (currentAssignments.isEmpty() || currentAssignments.get(0).getTokenRange().equals(tableAssignments.get(0).getTokenRange()))) { @@ -229,7 +228,7 @@ public List getRepairAssignmentsForTable(AutoRepairConfig.Repa long targetBytesSoFar = 0; List sizeEstimates = getRangeSizeEstimate(repairType, keyspaceName, tableName, tokenRanges); - // since its possible for us to hit maxBytesPerScheduleBytes before seeing all ranges, shuffle so there is chance + // since its possible for us to hit maxBytesPerSchedule before seeing all ranges, shuffle so there is chance // at least of hitting all the ranges _eventually_ for the worst case scenarios Collections.shuffle(sizeEstimates); for (SizeEstimate estimate : sizeEstimates) @@ -266,9 +265,7 @@ else if (targetBytesSoFar + smallestSubrange < maxBytesPerSchedule) { if (targetBytesSoFar + approximateBytesPerSplit > maxBytesPerSchedule) { - logger.warn("Refusing to add repair assignment for {}.{} for subrange {} with a approximateBytesPerSplit={} because it would increase total repair bytes to {} which is greater than {}={}", - keyspaceName, tableName, subrange, FileUtils.stringifyFileSize(approximateBytesPerSplit), FileUtils.stringifyFileSize(targetBytesSoFar + approximateBytesPerSplit), - MAX_BYTES_PER_SCHEDULE, FileUtils.stringifyFileSize(maxBytesPerSchedule)); + warnMaxBytesPerSchedule(repairType, keyspaceName, tableName); break; } logger.info("Added repair assignment for {}.{} for subrange {} (#{}/{})", @@ -293,19 +290,23 @@ else if (targetBytesSoFar + smallestSubrange < maxBytesPerSchedule) } else { - long calculatedSize = targetBytesSoFar + estimate.sizeForRepair; // Really this is "Ok" but it does mean we are relying on randomness to cover the other ranges - logger.info("Skipping range {} for {}.{} as {} would exceed {}={}, " + - "consider increasing {}, reducing node denity or monitoring to ensure all ranges do get repaired within gc_grace_seconds", - estimate.tokenRange, - keyspaceName, tableName, - FileUtils.stringifyFileSize(calculatedSize), - MAX_BYTES_PER_SCHEDULE, FileUtils.stringifyFileSize(calculatedSize), MAX_BYTES_PER_SCHEDULE); + warnMaxBytesPerSchedule(repairType, keyspaceName, tableName); } } return repairAssignments; } + private void warnMaxBytesPerSchedule(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName) + { + String warning = "Refusing to add repair assignment for {}.{} because it would increase total repair bytes greater {}"; + if (repairType == AutoRepairConfig.RepairType.FULL) + { + warning = warning + ", everything will not be repaired this schedule. Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure all ranges do get repaired within gc_grace_seconds"; + } + logger.warn(warning, keyspaceName, tableName, FileUtils.stringifyFileSize(maxBytesPerSchedule)); + } + private int calculateNumberOfSplits(SizeEstimate estimate) { // Calculate the number of splits needed for size and partitions From 39477958120b4553acb51467ddd5932e4d5826af Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Tue, 26 Nov 2024 22:27:11 -0600 Subject: [PATCH 077/257] Move size comparison up so priority can be factored in Rather than skipping assignments based on size as they are generated, generate all assignments. This way we can use table priorities as a guide for which assignments to exclude. There is still some more work to do here, as we are still splitting by max bytes, where we should really split by remaining bytes. Some further refactoring can be done here but I think this will put more emphasis on table priority when selecting repair assignments and the size of the repair is greater than max schedule bytes. --- .../IAutoRepairTokenRangeSplitter.java | 2 +- .../autorepair/RepairRangeSplitter.java | 176 ++++++++++++++---- .../autorepair/RepairRangeSplitterTest.java | 39 ++-- 3 files changed, 158 insertions(+), 59 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index a4760bc3f94f..d43a9f562594 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -54,7 +54,7 @@ public interface IAutoRepairTokenRangeSplitter * The priority is determined using the {@link RepairAssignment#getPriority(AutoRepairConfig.RepairType)} method. */ @VisibleForTesting - default void reorderByPriority(List repairAssignments, AutoRepairConfig.RepairType repairType) + default void reorderByPriority(List repairAssignments, AutoRepairConfig.RepairType repairType) { repairAssignments.sort(Comparator.comparingInt(a -> ((RepairAssignment) a).getPriority(repairType)).reversed()); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 3fbc2239f0c8..e4822adcfb13 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -25,6 +25,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -139,6 +140,12 @@ public RepairRangeSplitter(Map parameters) tablesPerAssignmentLimit = DEFAULT_TABLE_BATCH_LIMIT; } + if (bytesPerSubrange > maxBytesPerSchedule) + { + throw new RuntimeException(String.format("bytesPerSubrange '%s' cannot be greater than maxBytesPerSchedule '%s'", + FileUtils.stringifyFileSize(bytesPerSubrange), FileUtils.stringifyFileSize(maxBytesPerSchedule))); + } + logger.info("Configured {} with {}={}, {}={}, {}={}, {}={}", RepairRangeSplitter.class.getName(), SUBRANGE_SIZE, subrangeSize, MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule, PARTITION_COUNT, partitionsPerSubrange, TABLE_BATCH_LIMIT, tablesPerAssignmentLimit); @@ -155,9 +162,9 @@ public List getRepairAssignments(AutoRepairConfig.RepairType r @VisibleForTesting List getRepairAssignments(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Collection> tokenRanges) { - List repairAssignments = new ArrayList<>(); + List repairAssignments = new ArrayList<>(); // this is used for batching minimal single assignment tables together - List currentAssignments = new ArrayList<>(); + List currentAssignments = new ArrayList<>(); // sort the tables by size so can batch the smallest ones together tableNames.sort((t1, t2) -> { @@ -167,18 +174,30 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, t1)); return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); }); + for (String tableName : tableNames) { - List tableAssignments = getRepairAssignmentsForTable(repairType, keyspaceName, tableName, tokenRanges); + List tableAssignments = getRepairAssignmentsForTable(repairType, keyspaceName, tableName, tokenRanges); if (tableAssignments.isEmpty()) continue; // If the table assignments are for the same token range, and we have room to add more tables to the current assignment - if (tableAssignments.size() == 1 && currentAssignments.size() < tablesPerAssignmentLimit && + if (tableAssignments.size() == 1 && + currentAssignments.size() < tablesPerAssignmentLimit && (currentAssignments.isEmpty() || currentAssignments.get(0).getTokenRange().equals(tableAssignments.get(0).getTokenRange()))) { - currentAssignments.addAll(tableAssignments); + long currentAssignmentsBytes = getBytesInAssignments(currentAssignments); + long tableAssignmentsBytes = getBytesInAssignments(tableAssignments); + // only add assignments together if they don't exceed max bytes per schedule. + if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule) { + currentAssignments.addAll(tableAssignments); + } + else + { + // add table assignments by themselves + repairAssignments.addAll(tableAssignments); + } } else { @@ -193,12 +212,55 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy if (!currentAssignments.isEmpty()) repairAssignments.add(merge(currentAssignments)); + return filterRepairAssignments(repairType, repairAssignments); + } + + /** + * Given a repair type and list of sized-based repair assignments order them by priority of the + * assignments' underlying tables and confine them by maxBytesPerSchedule. + * @param repairType used to determine underyling table priorities + * @param repairAssignments the assignments to filter. + * @return A list of repair assignments ordered by priority and confined by maxBytesPerSchedule. + */ + protected List filterRepairAssignments(AutoRepairConfig.RepairType repairType, List repairAssignments) + { + // Reorder the repair assignments reorderByPriority(repairAssignments, repairType); - return repairAssignments; + + // Confine repair assignemnts by maxBytesPer Schedule if greater than maxBytesPerSchedule. + long assignmentBytes = getBytesInAssignments(repairAssignments); + if (assignmentBytes < maxBytesPerSchedule) + return repairAssignments.stream().map((a) -> (RepairAssignment)a).collect(Collectors.toList()); + else + { + long bytesSoFar = 0L; + List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); + for (SizedRepairAssignment repairAssignment : repairAssignments) { + // skip any repair assignments that would accumulate us past the maxBytesPerSchedule + if (bytesSoFar + repairAssignment.getEstimatedSizeInBytes() > maxBytesPerSchedule) + { + // log that repair assignment was skipped. + warnMaxBytesPerSchedule(repairType, repairAssignment); + } + else + { + assignmentsToReturn.add(repairAssignment); + bytesSoFar += repairAssignment.getEstimatedSizeInBytes(); + } + } + return assignmentsToReturn; + } + } + + @VisibleForTesting + protected static long getBytesInAssignments(List repairAssignments) { + return repairAssignments + .stream() + .mapToLong(SizedRepairAssignment::getEstimatedSizeInBytes).sum(); } @VisibleForTesting - static RepairAssignment merge(List assignments) + static SizedRepairAssignment merge(List assignments) { if (assignments.isEmpty()) throw new IllegalStateException("Cannot merge empty assignments"); @@ -207,7 +269,7 @@ static RepairAssignment merge(List assignments) Range referenceTokenRange = assignments.get(0).getTokenRange(); String referenceKeyspaceName = assignments.get(0).getKeyspaceName(); - for (RepairAssignment assignment : assignments) + for (SizedRepairAssignment assignment : assignments) { // These checks _should_ be unnecessary but are here to ensure that the assignments are consistent if (!assignment.getTokenRange().equals(referenceTokenRange)) @@ -218,40 +280,43 @@ static RepairAssignment merge(List assignments) mergedTableNames.addAll(assignment.getTableNames()); } - return new RepairAssignment(referenceTokenRange, referenceKeyspaceName, new ArrayList<>(mergedTableNames)); + long sizeForAssignment = getBytesInAssignments(assignments); + return new SizedRepairAssignment(referenceTokenRange, referenceKeyspaceName, new ArrayList<>(mergedTableNames), sizeForAssignment); } - public List getRepairAssignmentsForTable(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Collection> tokenRanges) + @VisibleForTesting + protected List getRepairAssignmentsForTable(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Collection> tokenRanges) { - List repairAssignments = new ArrayList<>(); + List sizeEstimates = getRangeSizeEstimate(repairType, keyspaceName, tableName, tokenRanges); + return getRepairAssignments(sizeEstimates); + } - long targetBytesSoFar = 0; + @VisibleForTesting + protected List getRepairAssignments(List sizeEstimates) + { + List repairAssignments = new ArrayList<>(); - List sizeEstimates = getRangeSizeEstimate(repairType, keyspaceName, tableName, tokenRanges); // since its possible for us to hit maxBytesPerSchedule before seeing all ranges, shuffle so there is chance // at least of hitting all the ranges _eventually_ for the worst case scenarios Collections.shuffle(sizeEstimates); for (SizeEstimate estimate : sizeEstimates) { - // Calculate the smallest subrange if we were to split, this can be used to determine if we can do any - // work if we have a non-zero estimate. - long smallestSubrange = Math.min(estimate.sizeForRepair, bytesPerSubrange); if (estimate.sizeForRepair == 0) { - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(estimate.keyspace, estimate.table); long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); if (memtableSize > 0L) { - logger.debug("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", keyspaceName, tableName, estimate.tokenRange, memtableSize); - RepairAssignment assignment = new RepairAssignment(estimate.tokenRange, keyspaceName, Collections.singletonList(tableName)); + logger.debug("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", estimate.keyspace, estimate.table, estimate.tokenRange, memtableSize); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), estimate.sizeInRange); repairAssignments.add(assignment); } else { - logger.debug("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", keyspaceName, tableName, estimate.tokenRange); + logger.debug("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", estimate.keyspace, estimate.table, estimate.tokenRange); } } - else if (targetBytesSoFar + smallestSubrange < maxBytesPerSchedule) + else { // Check if the estimate needs splitting based on the criteria boolean needsSplitting = estimate.sizeForRepair > bytesPerSubrange || estimate.partitions > partitionsPerSubrange; @@ -263,48 +328,37 @@ else if (targetBytesSoFar + smallestSubrange < maxBytesPerSchedule) Collection> subranges = split(estimate.tokenRange, numberOfSplits); for (Range subrange : subranges) { - if (targetBytesSoFar + approximateBytesPerSplit > maxBytesPerSchedule) - { - warnMaxBytesPerSchedule(repairType, keyspaceName, tableName); - break; - } logger.info("Added repair assignment for {}.{} for subrange {} (#{}/{})", - keyspaceName, tableName, subrange, repairAssignments.size() + 1, numberOfSplits); - RepairAssignment assignment = new RepairAssignment(subrange, keyspaceName, Collections.singletonList(tableName)); + estimate.keyspace, estimate.table, subrange, repairAssignments.size() + 1, numberOfSplits); + SizedRepairAssignment assignment = new SizedRepairAssignment(subrange, estimate.keyspace, Collections.singletonList(estimate.table), approximateBytesPerSplit); repairAssignments.add(assignment); - targetBytesSoFar += approximateBytesPerSplit; } } else { logger.info("Using 1 repair assignment for {}.{} for range {} as rangeBytes={} is less than {}={} and partitionEstimate={} is less than {}={}", - keyspaceName, tableName, estimate.tokenRange, + estimate.keyspace, estimate.table, estimate.tokenRange, FileUtils.stringifyFileSize(estimate.sizeForRepair), SUBRANGE_SIZE, FileUtils.stringifyFileSize(bytesPerSubrange), estimate.partitions, PARTITION_COUNT, partitionsPerSubrange); // No splitting needed, repair the entire range as-is - RepairAssignment assignment = new RepairAssignment(estimate.tokenRange, keyspaceName, Collections.singletonList(tableName)); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), estimate.sizeForRepair); repairAssignments.add(assignment); } } - else - { - // Really this is "Ok" but it does mean we are relying on randomness to cover the other ranges - warnMaxBytesPerSchedule(repairType, keyspaceName, tableName); - } } return repairAssignments; } - private void warnMaxBytesPerSchedule(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName) + private void warnMaxBytesPerSchedule(AutoRepairConfig.RepairType repairType, SizedRepairAssignment repairAssignment) { - String warning = "Refusing to add repair assignment for {}.{} because it would increase total repair bytes greater {}"; + String warning = "Refusing to add repair assignment of size {} for {}.{} because it would increase total repair bytes to a value greater than {}"; if (repairType == AutoRepairConfig.RepairType.FULL) { warning = warning + ", everything will not be repaired this schedule. Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure all ranges do get repaired within gc_grace_seconds"; } - logger.warn(warning, keyspaceName, tableName, FileUtils.stringifyFileSize(maxBytesPerSchedule)); + logger.warn(warning, repairAssignment.getEstimatedSizeInBytes(), repairAssignment.keyspaceName, repairAssignment.tableNames, FileUtils.stringifyFileSize(maxBytesPerSchedule)); } private int calculateNumberOfSplits(SizeEstimate estimate) @@ -481,4 +535,48 @@ public SizeEstimate(AutoRepairConfig.RepairType repairType, this.sizeForRepair = repairType == AutoRepairConfig.RepairType.INCREMENTAL ? totalSize : sizeInRange; } } + + @VisibleForTesting + protected static class SizedRepairAssignment extends RepairAssignment { + + final long estimatedSizedInBytes; + + public SizedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames, long estimatedSizeInBytes) + { + super(tokenRange, keyspaceName, tableNames); + this.estimatedSizedInBytes = estimatedSizeInBytes; + } + + public long getEstimatedSizeInBytes() { + return estimatedSizedInBytes; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + SizedRepairAssignment that = (SizedRepairAssignment) o; + return estimatedSizedInBytes == that.estimatedSizedInBytes; + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), estimatedSizedInBytes); + } + + @Override + public String toString() + { + return "SizedRepairAssignement{" + + "estimatedSizedInBytes=" + FileUtils.stringifyFileSize(estimatedSizedInBytes) + + ", keyspaceName='" + keyspaceName + '\'' + + ", tokenRange=" + tokenRange + + ", tableNames=" + tableNames + + '}'; + } + + } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index 71b60a92232e..2873b2566b0f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -39,6 +39,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; +import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SizedRepairAssignment; import org.apache.cassandra.utils.concurrent.Refs; import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.TABLE_BATCH_LIMIT; @@ -186,7 +187,7 @@ public void testReorderByPriorityWithOneElement() { @Test public void testGetRepairAssignmentsForTable_NoSSTables() { Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); assertEquals(0, assignments.size()); } @@ -194,7 +195,7 @@ public void testGetRepairAssignmentsForTable_NoSSTables() { public void testGetRepairAssignmentsForTable_Single() throws Throwable { Collection> ranges = Collections.singleton(new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken())); insertAndFlushSingleTable(tableName); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); assertEquals(1, assignments.size()); } @@ -250,7 +251,7 @@ public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { @Test(expected = IllegalStateException.class) public void testMergeEmptyAssignments() { // Test when the list of assignments is empty - List emptyAssignments = Collections.emptyList(); + List emptyAssignments = Collections.emptyList(); RepairRangeSplitter.merge(emptyAssignments); } @@ -260,10 +261,10 @@ public void testMergeSingleAssignment() { String keyspaceName = "testKeyspace"; List tableNames = Arrays.asList("table1", "table2"); - RepairAssignment assignment = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames); - List assignments = Collections.singletonList(assignment); + SizedRepairAssignment assignment = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames, 0L); + List assignments = Collections.singletonList(assignment); - RepairAssignment result = RepairRangeSplitter.merge(assignments); + SizedRepairAssignment result = RepairRangeSplitter.merge(assignments); assertEquals(FULL_RANGE, result.getTokenRange()); assertEquals(keyspaceName, result.getKeyspaceName()); @@ -277,11 +278,11 @@ public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() { List tableNames1 = Arrays.asList("table1", "table2"); List tableNames2 = Arrays.asList("table2", "table3"); - RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames1); - RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames2); - List assignments = Arrays.asList(assignment1, assignment2); + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames1, 0L); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2, 0L); + List assignments = Arrays.asList(assignment1, assignment2); - RepairAssignment result = RepairRangeSplitter.merge(assignments); + SizedRepairAssignment result = RepairRangeSplitter.merge(assignments); assertEquals(FULL_RANGE, result.getTokenRange()); assertEquals(keyspaceName, result.getKeyspaceName()); @@ -299,9 +300,9 @@ public void testMergeDifferentTokenRange() { String keyspaceName = "testKeyspace"; List tableNames = Arrays.asList("table1", "table2"); - RepairAssignment assignment1 = new RepairAssignment(tokenRange1, keyspaceName, tableNames); - RepairAssignment assignment2 = new RepairAssignment(tokenRange2, keyspaceName, tableNames); - List assignments = Arrays.asList(assignment1, assignment2); + SizedRepairAssignment assignment1 = new SizedRepairAssignment(tokenRange1, keyspaceName, tableNames, 0L); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(tokenRange2, keyspaceName, tableNames, 0L); + List assignments = Arrays.asList(assignment1, assignment2); RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException } @@ -311,9 +312,9 @@ public void testMergeDifferentKeyspaceName() { // Test merging assignments with different keyspace names List tableNames = Arrays.asList("table1", "table2"); - RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, "keyspace1", tableNames); - RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, "keyspace2", tableNames); - List assignments = Arrays.asList(assignment1, assignment2); + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, "keyspace1", tableNames, 0L); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, "keyspace2", tableNames, 0L); + List assignments = Arrays.asList(assignment1, assignment2); RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException } @@ -325,9 +326,9 @@ public void testMergeWithDuplicateTables() { List tableNames1 = Arrays.asList("table1", "table2"); List tableNames2 = Arrays.asList("table2", "table3"); - RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames1); - RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, keyspaceName, tableNames2); - List assignments = Arrays.asList(assignment1, assignment2); + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames1, 0L); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2, 0L); + List assignments = Arrays.asList(assignment1, assignment2); RepairAssignment result = RepairRangeSplitter.merge(assignments); From faed922210010c235176608bab8a65456ab7aa89 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 27 Nov 2024 08:24:06 -0600 Subject: [PATCH 078/257] Various cleanup --- .../autorepair/RepairRangeSplitter.java | 70 +++++++++++++------ 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index e4822adcfb13..510edc36f719 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -142,8 +142,11 @@ public RepairRangeSplitter(Map parameters) if (bytesPerSubrange > maxBytesPerSchedule) { - throw new RuntimeException(String.format("bytesPerSubrange '%s' cannot be greater than maxBytesPerSchedule '%s'", - FileUtils.stringifyFileSize(bytesPerSubrange), FileUtils.stringifyFileSize(maxBytesPerSchedule))); + throw new IllegalArgumentException(String.format("%s='%s' cannot be greater than %s='%s'", + SUBRANGE_SIZE, + FileUtils.stringifyFileSize(bytesPerSubrange), + MAX_BYTES_PER_SCHEDULE, + FileUtils.stringifyFileSize(maxBytesPerSchedule))); } logger.info("Configured {} with {}={}, {}={}, {}={}, {}={}", RepairRangeSplitter.class.getName(), @@ -187,8 +190,8 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy currentAssignments.size() < tablesPerAssignmentLimit && (currentAssignments.isEmpty() || currentAssignments.get(0).getTokenRange().equals(tableAssignments.get(0).getTokenRange()))) { - long currentAssignmentsBytes = getBytesInAssignments(currentAssignments); - long tableAssignmentsBytes = getBytesInAssignments(tableAssignments); + long currentAssignmentsBytes = getEstimatedBytes(currentAssignments); + long tableAssignmentsBytes = getEstimatedBytes(tableAssignments); // only add assignments together if they don't exceed max bytes per schedule. if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule) { currentAssignments.addAll(tableAssignments); @@ -228,7 +231,7 @@ protected List filterRepairAssignments(AutoRepairConfig.Repair reorderByPriority(repairAssignments, repairType); // Confine repair assignemnts by maxBytesPer Schedule if greater than maxBytesPerSchedule. - long assignmentBytes = getBytesInAssignments(repairAssignments); + long assignmentBytes = getEstimatedBytes(repairAssignments); if (assignmentBytes < maxBytesPerSchedule) return repairAssignments.stream().map((a) -> (RepairAssignment)a).collect(Collectors.toList()); else @@ -237,7 +240,7 @@ protected List filterRepairAssignments(AutoRepairConfig.Repair List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); for (SizedRepairAssignment repairAssignment : repairAssignments) { // skip any repair assignments that would accumulate us past the maxBytesPerSchedule - if (bytesSoFar + repairAssignment.getEstimatedSizeInBytes() > maxBytesPerSchedule) + if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule) { // log that repair assignment was skipped. warnMaxBytesPerSchedule(repairType, repairAssignment); @@ -245,18 +248,25 @@ protected List filterRepairAssignments(AutoRepairConfig.Repair else { assignmentsToReturn.add(repairAssignment); - bytesSoFar += repairAssignment.getEstimatedSizeInBytes(); + bytesSoFar += repairAssignment.getEstimatedBytes(); } } return assignmentsToReturn; } } + /** + * @return The sum of {@link SizedRepairAssignment#getEstimatedBytes()} of all given + * repairAssignments. + * @param repairAssignments The assignments to sum + */ @VisibleForTesting - protected static long getBytesInAssignments(List repairAssignments) { + protected static long getEstimatedBytes(List repairAssignments) + { return repairAssignments .stream() - .mapToLong(SizedRepairAssignment::getEstimatedSizeInBytes).sum(); + .mapToLong(SizedRepairAssignment::getEstimatedBytes) + .sum(); } @VisibleForTesting @@ -280,7 +290,7 @@ static SizedRepairAssignment merge(List assignments) mergedTableNames.addAll(assignment.getTableNames()); } - long sizeForAssignment = getBytesInAssignments(assignments); + long sizeForAssignment = getEstimatedBytes(assignments); return new SizedRepairAssignment(referenceTokenRange, referenceKeyspaceName, new ArrayList<>(mergedTableNames), sizeForAssignment); } @@ -353,12 +363,20 @@ protected List getRepairAssignments(List si private void warnMaxBytesPerSchedule(AutoRepairConfig.RepairType repairType, SizedRepairAssignment repairAssignment) { - String warning = "Refusing to add repair assignment of size {} for {}.{} because it would increase total repair bytes to a value greater than {}"; + String warning = "Refusing to add repair assignment of size {} for {}.{} because it would increase total repair bytes to a value greater than {} ({})"; if (repairType == AutoRepairConfig.RepairType.FULL) { warning = warning + ", everything will not be repaired this schedule. Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure all ranges do get repaired within gc_grace_seconds"; } - logger.warn(warning, repairAssignment.getEstimatedSizeInBytes(), repairAssignment.keyspaceName, repairAssignment.tableNames, FileUtils.stringifyFileSize(maxBytesPerSchedule)); + // TODO: Add two metrics: + // Meter: RepairRangeSplitter.RepairType.SkippedAssignments + // Meter: RepairRangeSplitter.RepairType.SkippedBytes + logger.warn(warning, + repairAssignment.getEstimatedBytes(), + repairAssignment.keyspaceName, + repairAssignment.tableNames, + FileUtils.stringifyFileSize(maxBytesPerSchedule), + FileUtils.stringifyFileSize(maxBytesPerSchedule + repairAssignment.getEstimatedBytes())); } private int calculateNumberOfSplits(SizeEstimate estimate) @@ -387,7 +405,7 @@ private int calculateNumberOfSplits(SizeEstimate estimate) return splits; } - public Collection> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) + private Collection> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) { // Collect all applicable token ranges Collection> wrappedRanges; @@ -536,19 +554,28 @@ public SizeEstimate(AutoRepairConfig.RepairType repairType, } } + /** + * Implementation of RepairAssignment that also assigns an estimation of bytes involved + * in the repair. + */ @VisibleForTesting protected static class SizedRepairAssignment extends RepairAssignment { - final long estimatedSizedInBytes; + final long estimatedBytes; - public SizedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames, long estimatedSizeInBytes) + public SizedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames, long estimatedBytes) { super(tokenRange, keyspaceName, tableNames); - this.estimatedSizedInBytes = estimatedSizeInBytes; + this.estimatedBytes = estimatedBytes; } - public long getEstimatedSizeInBytes() { - return estimatedSizedInBytes; + /** + * Estimated bytes involved in the assignment. Typically Derived from {@link SizeEstimate#sizeForRepair}. + * @return estimated bytes involved in the assignment. + */ + public long getEstimatedBytes() + { + return estimatedBytes; } @Override @@ -558,25 +585,24 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) return false; if (!super.equals(o)) return false; SizedRepairAssignment that = (SizedRepairAssignment) o; - return estimatedSizedInBytes == that.estimatedSizedInBytes; + return estimatedBytes == that.estimatedBytes; } @Override public int hashCode() { - return Objects.hash(super.hashCode(), estimatedSizedInBytes); + return Objects.hash(super.hashCode(), estimatedBytes); } @Override public String toString() { return "SizedRepairAssignement{" + - "estimatedSizedInBytes=" + FileUtils.stringifyFileSize(estimatedSizedInBytes) + + "estimatedSizedInBytes=" + FileUtils.stringifyFileSize(estimatedBytes) + ", keyspaceName='" + keyspaceName + '\'' + ", tokenRange=" + tokenRange + ", tableNames=" + tableNames + '}'; } - } } From a2cb72d124c7f73330bafb85286937fa86183f5e Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 27 Nov 2024 09:12:58 -0600 Subject: [PATCH 079/257] Handle EmpySSTableScanner being returned from getScanner If no SSTables cover a requested range an EmptySSTableScanner will be returned, handle this. --- .../autorepair/RepairRangeSplitter.java | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 510edc36f719..3af982214e5b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -46,6 +46,7 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.big.BigTableReader; import org.apache.cassandra.io.sstable.format.big.BigTableScanner; @@ -317,13 +318,13 @@ protected List getRepairAssignments(List si long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); if (memtableSize > 0L) { - logger.debug("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", estimate.keyspace, estimate.table, estimate.tokenRange, memtableSize); + logger.info("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", estimate.keyspace, estimate.table, estimate.tokenRange, memtableSize); SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), estimate.sizeInRange); repairAssignments.add(assignment); } else { - logger.debug("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", estimate.keyspace, estimate.table, estimate.tokenRange); + logger.info("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", estimate.keyspace, estimate.table, estimate.tokenRange); } } else @@ -466,22 +467,30 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai // still better to use the cardinality estimator then the index since it wont count duplicates. // get the bounds of the sstable for this range using the index file but do not actually read it. List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); - try (BigTableScanner scanner = (BigTableScanner) BigTableScanner.getScanner((BigTableReader) reader, Collections.singleton(tokenRange))) - { - assert bounds.size() == 1; - AbstractBounds bound = bounds.get(0); - long startPosition = scanner.getDataPosition(bound.left); - long endPosition = scanner.getDataPosition(bound.right); - // If end position is 0 we can assume the sstable ended before that token, bound at size of file - if (endPosition == 0) + ISSTableScanner rangeScanner = BigTableScanner.getScanner((BigTableReader) reader, Collections.singleton(tokenRange)); + // Type check scanner returned as it may be an EmptySSTableScanner if the range is not covered in the + // SSTable, in this case we will avoid incrementing approxBytesInRange. + if (rangeScanner instanceof BigTableScanner) + { + try (BigTableScanner scanner = (BigTableScanner) rangeScanner) { - endPosition = sstableSize; + assert bounds.size() == 1; + + AbstractBounds bound = bounds.get(0); + long startPosition = scanner.getDataPosition(bound.left); + long endPosition = scanner.getDataPosition(bound.right); + // If end position is 0 we can assume the sstable ended before that token, bound at size of file + if (endPosition == 0) + { + endPosition = sstableSize; + } + + long approximateRangeBytesInSSTable = Math.max(0, endPosition - startPosition); + approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); } - - long approximateRangeBytesInSSTable = Math.max(0, endPosition - startPosition); - approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); } + } catch (IOException | CardinalityMergeException e) { From 5ee5e666b213c4ef1fef17b0d7953afdab1d4b92 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 27 Nov 2024 11:47:16 -0600 Subject: [PATCH 080/257] Minor refactoring around bytesPerSubrange, logging Avoid intermediate variable by making DEFAULT_SUBRANGE_SIZE a long. Log bytes using stringifyFileSize --- .../repair/autorepair/RepairRangeSplitter.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 3af982214e5b..82c66d4bb6aa 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -96,23 +96,21 @@ public class RepairRangeSplitter implements IAutoRepairTokenRangeSplitter private final long bytesPerSubrange; private final long partitionsPerSubrange; - private static final DataStorageSpec.LongBytesBound DEFAULT_SUBRANGE_SIZE = new DataStorageSpec.LongBytesBound("100GiB"); + private static final long DEFAULT_SUBRANGE_SIZE = new DataStorageSpec.LongBytesBound("100GiB").toBytes(); private static final long DEFAULT_MAX_BYTES_PER_SCHEDULE = Long.MAX_VALUE; private static final long DEFAULT_PARTITION_LIMIT = (long) Math.pow(2, DatabaseDescriptor.getRepairSessionMaxTreeDepth()); private static final int DEFAULT_TABLE_BATCH_LIMIT = 64; public RepairRangeSplitter(Map parameters) { - DataStorageSpec.LongBytesBound subrangeSize; if (parameters.containsKey(SUBRANGE_SIZE)) { - subrangeSize = new DataStorageSpec.LongBytesBound(parameters.get(SUBRANGE_SIZE)); + bytesPerSubrange = new DataStorageSpec.LongBytesBound(parameters.get(SUBRANGE_SIZE)).toBytes(); } else { - subrangeSize = DEFAULT_SUBRANGE_SIZE; + bytesPerSubrange = DEFAULT_SUBRANGE_SIZE; } - bytesPerSubrange = subrangeSize.toBytes(); if (parameters.containsKey(MAX_BYTES_PER_SCHEDULE)) { @@ -151,7 +149,8 @@ public RepairRangeSplitter(Map parameters) } logger.info("Configured {} with {}={}, {}={}, {}={}, {}={}", RepairRangeSplitter.class.getName(), - SUBRANGE_SIZE, subrangeSize, MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule, + SUBRANGE_SIZE, FileUtils.stringifyFileSize(bytesPerSubrange), + MAX_BYTES_PER_SCHEDULE, FileUtils.stringifyFileSize(maxBytesPerSchedule), PARTITION_COUNT, partitionsPerSubrange, TABLE_BATCH_LIMIT, tablesPerAssignmentLimit); } @@ -373,7 +372,7 @@ private void warnMaxBytesPerSchedule(AutoRepairConfig.RepairType repairType, Siz // Meter: RepairRangeSplitter.RepairType.SkippedAssignments // Meter: RepairRangeSplitter.RepairType.SkippedBytes logger.warn(warning, - repairAssignment.getEstimatedBytes(), + FileUtils.stringifyFileSize(repairAssignment.getEstimatedBytes()), repairAssignment.keyspaceName, repairAssignment.tableNames, FileUtils.stringifyFileSize(maxBytesPerSchedule), From 4f35fe2f03be75c3c26afe21d54fd56d3ae94a21 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:47:27 -0600 Subject: [PATCH 081/257] Cleanup logging of repair assignments --- .../autorepair/RepairRangeSplitter.java | 163 ++++++++++++------ .../autorepair/RepairRangeSplitterTest.java | 18 +- 2 files changed, 124 insertions(+), 57 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 82c66d4bb6aa..93e7431a15c1 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -212,10 +212,11 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy repairAssignments.addAll(tableAssignments); } } + if (!currentAssignments.isEmpty()) repairAssignments.add(merge(currentAssignments)); - return filterRepairAssignments(repairType, repairAssignments); + return filterRepairAssignments(repairType, keyspaceName, repairAssignments); } /** @@ -225,34 +226,74 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy * @param repairAssignments the assignments to filter. * @return A list of repair assignments ordered by priority and confined by maxBytesPerSchedule. */ - protected List filterRepairAssignments(AutoRepairConfig.RepairType repairType, List repairAssignments) + protected List filterRepairAssignments(AutoRepairConfig.RepairType repairType, String keyspaceName, List repairAssignments) { + if (repairAssignments.isEmpty()) + return Collections.emptyList(); + // Reorder the repair assignments reorderByPriority(repairAssignments, repairType); - // Confine repair assignemnts by maxBytesPer Schedule if greater than maxBytesPerSchedule. - long assignmentBytes = getEstimatedBytes(repairAssignments); - if (assignmentBytes < maxBytesPerSchedule) - return repairAssignments.stream().map((a) -> (RepairAssignment)a).collect(Collectors.toList()); - else + // Confine repair assignments by maxBytesPerSchedule. + long bytesSoFar = 0L; + boolean isIncremental = false; + long bytesNotRepaired = 0L; + int assignmentsNotRepaired = 0; + List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); + for (SizedRepairAssignment repairAssignment : repairAssignments) { - long bytesSoFar = 0L; - List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); - for (SizedRepairAssignment repairAssignment : repairAssignments) { - // skip any repair assignments that would accumulate us past the maxBytesPerSchedule - if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule) - { - // log that repair assignment was skipped. - warnMaxBytesPerSchedule(repairType, repairAssignment); - } - else - { - assignmentsToReturn.add(repairAssignment); - bytesSoFar += repairAssignment.getEstimatedBytes(); - } + // skip any repair assignments that would accumulate us past the maxBytesPerSchedule + if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule) + { + // log that repair assignment was skipped. + bytesNotRepaired += repairAssignment.getEstimatedBytes(); + assignmentsNotRepaired++; + logger.warn("Skipping {} because it would increase total repair bytes to {}", + repairAssignment, + getBytesOfMaxBytesPerSchedule(bytesSoFar + repairAssignment.getEstimatedBytes())); + } + else + { + bytesSoFar += repairAssignment.getEstimatedBytes(); + logger.info("Adding {}, increasing repair bytes to {}", + repairAssignment, + getBytesOfMaxBytesPerSchedule(bytesSoFar)); + assignmentsToReturn.add(repairAssignment); } - return assignmentsToReturn; } + + String message = "Returning {} assignment(s) for {}, totaling {}"; + if (assignmentsNotRepaired != 0) + { + message += ". Skipping {} of {} assignment(s), totaling {}"; + if (repairType != AutoRepairConfig.RepairType.INCREMENTAL) + { + message += ". The entire primary range will not be repaired this schedule. " + + "Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure " + + "all ranges do get repaired within gc_grace_seconds"; + logger.warn(message, assignmentsToReturn.size(), keyspaceName, getBytesOfMaxBytesPerSchedule(bytesSoFar), + assignmentsNotRepaired, repairAssignments.size(), + FileUtils.stringifyFileSize(bytesNotRepaired)); + } + else + { + logger.info(message, assignmentsToReturn.size(), keyspaceName, getBytesOfMaxBytesPerSchedule(bytesSoFar), + assignmentsNotRepaired, repairAssignments.size(), + FileUtils.stringifyFileSize(bytesNotRepaired)); + } + } + else + { + logger.info(message, assignmentsToReturn.size(), keyspaceName, getBytesOfMaxBytesPerSchedule(bytesSoFar)); + } + return assignmentsToReturn; + } + + private String getBytesOfMaxBytesPerSchedule(long bytes) { + if (maxBytesPerSchedule == Long.MAX_VALUE) + return FileUtils.stringifyFileSize(bytes); + else + return String.format("%s of %s", FileUtils.stringifyFileSize(bytes), FileUtils.stringifyFileSize(maxBytesPerSchedule)); } /** @@ -291,7 +332,8 @@ static SizedRepairAssignment merge(List assignments) } long sizeForAssignment = getEstimatedBytes(assignments); - return new SizedRepairAssignment(referenceTokenRange, referenceKeyspaceName, new ArrayList<>(mergedTableNames), sizeForAssignment); + return new SizedRepairAssignment(referenceTokenRange, referenceKeyspaceName, new ArrayList<>(mergedTableNames), + "full primary range for " + mergedTableNames.size() + " tables", sizeForAssignment); } @VisibleForTesting @@ -317,20 +359,19 @@ protected List getRepairAssignments(List si long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); if (memtableSize > 0L) { - logger.info("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", estimate.keyspace, estimate.table, estimate.tokenRange, memtableSize); - SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), estimate.sizeInRange); + logger.debug("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", estimate.keyspace, estimate.table, estimate.tokenRange, memtableSize); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), "memtable only", estimate.sizeInRange); repairAssignments.add(assignment); } else { - logger.info("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", estimate.keyspace, estimate.table, estimate.tokenRange); + logger.debug("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", estimate.keyspace, estimate.table, estimate.tokenRange); } } else { // Check if the estimate needs splitting based on the criteria boolean needsSplitting = estimate.sizeForRepair > bytesPerSubrange || estimate.partitions > partitionsPerSubrange; - if (needsSplitting) { int numberOfSplits = calculateNumberOfSplits(estimate); @@ -338,22 +379,18 @@ protected List getRepairAssignments(List si Collection> subranges = split(estimate.tokenRange, numberOfSplits); for (Range subrange : subranges) { - logger.info("Added repair assignment for {}.{} for subrange {} (#{}/{})", - estimate.keyspace, estimate.table, subrange, repairAssignments.size() + 1, numberOfSplits); - SizedRepairAssignment assignment = new SizedRepairAssignment(subrange, estimate.keyspace, Collections.singletonList(estimate.table), approximateBytesPerSplit); + SizedRepairAssignment assignment = new SizedRepairAssignment(subrange, estimate.keyspace, Collections.singletonList(estimate.table), + String.format("subrange %d of %d", repairAssignments.size()+1, numberOfSplits), + approximateBytesPerSplit); repairAssignments.add(assignment); } } else { - logger.info("Using 1 repair assignment for {}.{} for range {} as rangeBytes={} is less than {}={} and partitionEstimate={} is less than {}={}", - estimate.keyspace, estimate.table, estimate.tokenRange, - FileUtils.stringifyFileSize(estimate.sizeForRepair), - SUBRANGE_SIZE, FileUtils.stringifyFileSize(bytesPerSubrange), - estimate.partitions, - PARTITION_COUNT, partitionsPerSubrange); // No splitting needed, repair the entire range as-is - SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), estimate.sizeForRepair); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, + Collections.singletonList(estimate.table), + "full primary range for table", estimate.sizeForRepair); repairAssignments.add(assignment); } } @@ -363,7 +400,7 @@ protected List getRepairAssignments(List si private void warnMaxBytesPerSchedule(AutoRepairConfig.RepairType repairType, SizedRepairAssignment repairAssignment) { - String warning = "Refusing to add repair assignment of size {} for {}.{} because it would increase total repair bytes to a value greater than {} ({})"; + String warning = "Refusing to add repair assignment {} because it would increase total repair bytes to a value greater than {} ({})"; if (repairType == AutoRepairConfig.RepairType.FULL) { warning = warning + ", everything will not be repaired this schedule. Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure all ranges do get repaired within gc_grace_seconds"; @@ -372,9 +409,7 @@ private void warnMaxBytesPerSchedule(AutoRepairConfig.RepairType repairType, Siz // Meter: RepairRangeSplitter.RepairType.SkippedAssignments // Meter: RepairRangeSplitter.RepairType.SkippedBytes logger.warn(warning, - FileUtils.stringifyFileSize(repairAssignment.getEstimatedBytes()), - repairAssignment.keyspaceName, - repairAssignment.tableNames, + repairAssignment, FileUtils.stringifyFileSize(maxBytesPerSchedule), FileUtils.stringifyFileSize(maxBytesPerSchedule + repairAssignment.getEstimatedBytes())); } @@ -436,7 +471,7 @@ private List getRangeSizeEstimate(AutoRepairConfig.RepairType repa try (Refs refs = getSSTableReaderRefs(repairType, keyspace, table, tokenRange)) { SizeEstimate estimate = getSizesForRangeOfSSTables(repairType, keyspace, table, tokenRange, refs); - logger.debug("Size estimate for {}.{} for range {} is {}", keyspace, table, tokenRange, estimate); + logger.debug("Generated size estimate {}", estimate); sizeEstimates.add(estimate); } } @@ -560,6 +595,21 @@ public SizeEstimate(AutoRepairConfig.RepairType repairType, this.sizeForRepair = repairType == AutoRepairConfig.RepairType.INCREMENTAL ? totalSize : sizeInRange; } + + @Override + public String toString() + { + return "SizeEstimate{" + + "repairType=" + repairType + + ", keyspace='" + keyspace + '\'' + + ", table='" + table + '\'' + + ", tokenRange=" + tokenRange + + ", partitions=" + partitions + + ", sizeInRange=" + sizeInRange + + ", totalSize=" + totalSize + + ", sizeForRepair=" + sizeForRepair + + '}'; + } } /** @@ -569,14 +619,30 @@ public SizeEstimate(AutoRepairConfig.RepairType repairType, @VisibleForTesting protected static class SizedRepairAssignment extends RepairAssignment { + final String description; final long estimatedBytes; - public SizedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames, long estimatedBytes) + public SizedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames) + { + this(tokenRange, keyspaceName, tableNames, "", 0L); + } + + public SizedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames, + String description, + long estimatedBytes) { super(tokenRange, keyspaceName, tableNames); + this.description = description; this.estimatedBytes = estimatedBytes; } + /** + * @return Additional metadata about the repair assignment. + */ + public String getDescription() { + return description; + } + /** * Estimated bytes involved in the assignment. Typically Derived from {@link SizeEstimate#sizeForRepair}. * @return estimated bytes involved in the assignment. @@ -593,23 +659,24 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) return false; if (!super.equals(o)) return false; SizedRepairAssignment that = (SizedRepairAssignment) o; - return estimatedBytes == that.estimatedBytes; + return estimatedBytes == that.estimatedBytes && Objects.equals(description, that.description); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), estimatedBytes); + return Objects.hash(super.hashCode(), description, estimatedBytes); } @Override public String toString() { - return "SizedRepairAssignement{" + - "estimatedSizedInBytes=" + FileUtils.stringifyFileSize(estimatedBytes) + - ", keyspaceName='" + keyspaceName + '\'' + + return "SizedRepairAssignment{" + + "description='" + description + '\'' + ", tokenRange=" + tokenRange + + ", keyspaceName='" + keyspaceName + '\'' + ", tableNames=" + tableNames + + ", estimatedBytes=" + FileUtils.stringifyFileSize(estimatedBytes) + '}'; } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index 2873b2566b0f..6c91fbd539be 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -261,7 +261,7 @@ public void testMergeSingleAssignment() { String keyspaceName = "testKeyspace"; List tableNames = Arrays.asList("table1", "table2"); - SizedRepairAssignment assignment = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames, 0L); + SizedRepairAssignment assignment = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames); List assignments = Collections.singletonList(assignment); SizedRepairAssignment result = RepairRangeSplitter.merge(assignments); @@ -278,8 +278,8 @@ public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() { List tableNames1 = Arrays.asList("table1", "table2"); List tableNames2 = Arrays.asList("table2", "table3"); - SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames1, 0L); - SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2, 0L); + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames1); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2); List assignments = Arrays.asList(assignment1, assignment2); SizedRepairAssignment result = RepairRangeSplitter.merge(assignments); @@ -300,8 +300,8 @@ public void testMergeDifferentTokenRange() { String keyspaceName = "testKeyspace"; List tableNames = Arrays.asList("table1", "table2"); - SizedRepairAssignment assignment1 = new SizedRepairAssignment(tokenRange1, keyspaceName, tableNames, 0L); - SizedRepairAssignment assignment2 = new SizedRepairAssignment(tokenRange2, keyspaceName, tableNames, 0L); + SizedRepairAssignment assignment1 = new SizedRepairAssignment(tokenRange1, keyspaceName, tableNames); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(tokenRange2, keyspaceName, tableNames); List assignments = Arrays.asList(assignment1, assignment2); RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException @@ -312,8 +312,8 @@ public void testMergeDifferentKeyspaceName() { // Test merging assignments with different keyspace names List tableNames = Arrays.asList("table1", "table2"); - SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, "keyspace1", tableNames, 0L); - SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, "keyspace2", tableNames, 0L); + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, "keyspace1", tableNames); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, "keyspace2", tableNames); List assignments = Arrays.asList(assignment1, assignment2); RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException @@ -326,8 +326,8 @@ public void testMergeWithDuplicateTables() { List tableNames1 = Arrays.asList("table1", "table2"); List tableNames2 = Arrays.asList("table2", "table3"); - SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames1, 0L); - SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2, 0L); + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames1); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2); List assignments = Arrays.asList(assignment1, assignment2); RepairAssignment result = RepairRangeSplitter.merge(assignments); From 56c686c9a97a22e21950cb5e58cfba8638796bd2 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sat, 30 Nov 2024 12:04:09 -0600 Subject: [PATCH 082/257] Remove UnrepairedBytesBasedTokenRangeSplitter --- ...nrepairedBytesBasedTokenRangeSplitter.java | 337 ------------------ 1 file changed, 337 deletions(-) delete mode 100644 src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java diff --git a/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java deleted file mode 100644 index 91d5783abe8e..000000000000 --- a/src/java/org/apache/cassandra/repair/autorepair/UnrepairedBytesBasedTokenRangeSplitter.java +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.repair.autorepair; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.cassandra.config.DataStorageSpec; -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.PartitionPosition; -import org.apache.cassandra.db.lifecycle.SSTableIntervalTree; -import org.apache.cassandra.db.lifecycle.SSTableSet; -import org.apache.cassandra.db.lifecycle.View; -import org.apache.cassandra.dht.AbstractBounds; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.io.sstable.format.big.BigTableReader; -import org.apache.cassandra.io.sstable.format.big.BigTableScanner; -import org.apache.cassandra.io.util.FileUtils; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.utils.concurrent.Refs; - -import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; - -public class UnrepairedBytesBasedTokenRangeSplitter implements IAutoRepairTokenRangeSplitter -{ - private static final Logger logger = LoggerFactory.getLogger(UnrepairedBytesBasedTokenRangeSplitter.class); - - static final String SUBRANGE_SIZE = "subrange_size"; - static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; - - // target bytes per subrange - private final DataStorageSpec.LongBytesBound subrangeSize; - - // maximum target bytes to repair - private final DataStorageSpec.LongBytesBound maxBytesPerSchedule; - - private final long subrangeBytes; - - private final long maxBytesPerScheduleBytes; - - private static final DataStorageSpec.LongBytesBound DEFAULT_SUBRANGE_SIZE = new DataStorageSpec.LongBytesBound("100GiB"); - private static final DataStorageSpec.LongBytesBound DEFAULT_MAX_BYTES_PER_SCHEDULE = new DataStorageSpec.LongBytesBound("500GiB"); - - public UnrepairedBytesBasedTokenRangeSplitter(Map parameters) - { - // Demonstrates parameterizing a range splitter so we can have splitter specific options. - if (parameters.containsKey(SUBRANGE_SIZE)) - { - subrangeSize = new DataStorageSpec.LongBytesBound(parameters.get(SUBRANGE_SIZE)); - } - else - { - subrangeSize = DEFAULT_SUBRANGE_SIZE; - } - subrangeBytes = subrangeSize.toBytes(); - - if (parameters.containsKey(MAX_BYTES_PER_SCHEDULE)) - { - maxBytesPerSchedule = new DataStorageSpec.LongBytesBound(parameters.get(MAX_BYTES_PER_SCHEDULE)); - } - else - { - maxBytesPerSchedule = DEFAULT_MAX_BYTES_PER_SCHEDULE; - } - maxBytesPerScheduleBytes = maxBytesPerSchedule.toBytes(); - - logger.info("Configured {} with {}={}, {}={}", UnrepairedBytesBasedTokenRangeSplitter.class.getName(), - SUBRANGE_SIZE, subrangeSize, MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule); - } - - @Override - public List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) - { - List repairAssignments = new ArrayList<>(); - - logger.info("Calculating token range splits for repairType={} primaryRangeOnly={} keyspaceName={} tableNames={}", repairType, primaryRangeOnly, keyspaceName, tableNames); - if (repairType != AutoRepairConfig.RepairType.INCREMENTAL) - { - throw new IllegalArgumentException(this.getClass().getName() + " only supports " + AutoRepairConfig.RepairType.INCREMENTAL + " repair"); - } - - // TODO: create a custom repair assignment that indicates number of bytes in repair and join tables by byte size. - Collection> tokenRanges = getTokenRanges(primaryRangeOnly, keyspaceName); - for (String tableName : tableNames) - { - repairAssignments.addAll(getRepairAssignmentsForTable(keyspaceName, tableName, tokenRanges)); - } - return repairAssignments; - } - - public List getRepairAssignmentsForTable(String keyspaceName, String tableName, Collection> tokenRanges) - { - List repairAssignments = new ArrayList<>(); - - long targetBytesSoFar = 0; - - for (Range tokenRange : tokenRanges) - { - logger.info("Calculating unrepaired bytes for {}.{} for range {}", keyspaceName, tableName, tokenRange); - // Capture the amount of unrepaired bytes for range - long approximateUnrepairedBytesForRange = 0L; - // Capture the total bytes in read sstables, this will be useful for calculating the ratio - // of data in SSTables including this range and also useful to know how much anticompaction there will be. - long totalBytesInUnrepairedSSTables = 0L; - try (Refs refs = getSSTableReaderRefs(keyspaceName, tableName, tokenRange)) - { - for (SSTableReader reader : refs) - { - // Only evaluate unrepaired SSTables. - if (!reader.isRepaired()) - { - long sstableSize = reader.bytesOnDisk(); - totalBytesInUnrepairedSSTables += sstableSize; - // get the bounds of the sstable for this range using the index file but do not actually read it. - List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); - try (BigTableScanner scanner = (BigTableScanner) BigTableScanner.getScanner((BigTableReader) reader, Collections.singleton(tokenRange))) - { - assert bounds.size() == 1; - - AbstractBounds bound = bounds.get(0); - long startPosition = scanner.getDataPosition(bound.left); - long endPosition = scanner.getDataPosition(bound.right); - // If end position is 0 we can assume the sstable ended before that token, bound at size of file - if (endPosition == 0) - { - endPosition = sstableSize; - } - - long approximateRangeBytesInSSTable = Math.max(0, endPosition - startPosition); - // get the fraction of the sstable belonging to the range. - approximateUnrepairedBytesForRange += Math.min(approximateRangeBytesInSSTable, sstableSize); - double ratio = approximateRangeBytesInSSTable / (double) sstableSize; - logger.info("Calculations for {}.{} {}: sstableSize={}, rangeBytesInSSTable={}, startPosition={}, endPosition={}, ratio={}", - keyspaceName, tableName, reader.descriptor.baseFile().name(), - FileUtils.stringifyFileSize(sstableSize), FileUtils.stringifyFileSize(approximateRangeBytesInSSTable), startPosition, endPosition, ratio); - } - } - else - { - logger.info("Skipping over {}.{} {} ({}) because it is repaired", keyspaceName, tableName, reader.descriptor.baseFile().name(), FileUtils.stringifyFileSize(reader.bytesOnDisk())); - } - } - } - - // Only consider token range if it had unrepaired sstables or live data in memtables. - if (totalBytesInUnrepairedSSTables > 0L) - { - // TODO: Possibly some anticompaction configuration we want here, where if we detect a large amount of anticompaction we want to reduce the work we do. - double ratio = approximateUnrepairedBytesForRange / (double) totalBytesInUnrepairedSSTables; - logger.info("Calculated unrepaired bytes for {}.{} for range {}: sstableSize={}, rangeBytesInSSTables={}, ratio={}", keyspaceName, tableName, tokenRange, - FileUtils.stringifyFileSize(totalBytesInUnrepairedSSTables), FileUtils.stringifyFileSize(approximateUnrepairedBytesForRange), ratio); - - // TODO: split on byte size here, this is currently a bit naive in assuming that data is evenly distributed among the range which may not be the - // right assumption. May want to consider when splitting on these ranges to reevaluate how much data is in the range, but for this - // exists as a demonstration. - if (approximateUnrepairedBytesForRange < subrangeBytes) - { - // accept range as is if less than bytes. - logger.info("Using 1 repair assignment for {}.{} for range {} as {} is less than {}", keyspaceName, tableName, tokenRange, - FileUtils.stringifyFileSize(approximateUnrepairedBytesForRange), subrangeSize); - // TODO: this is a bit repetitive see if can reduce more. - RepairAssignment assignment = new BytesBasedRepairAssignment(tokenRange, keyspaceName, Collections.singletonList(tableName), approximateUnrepairedBytesForRange); - if (canAddAssignment(assignment, targetBytesSoFar, approximateUnrepairedBytesForRange)) - { - repairAssignments.add(assignment); - targetBytesSoFar += approximateUnrepairedBytesForRange; - } - else - return repairAssignments; - } - else - { - long targetRanges = approximateUnrepairedBytesForRange / subrangeBytes; - // TODO: approximation per range, this is a bit lossy since targetRanges rounds down. - long approximateBytesPerSplit = approximateUnrepairedBytesForRange / targetRanges; - logger.info("Splitting {}.{} for range {} into {} sub ranges, approximateBytesPerSplit={}", keyspaceName, tableName, tokenRange, targetRanges, FileUtils.stringifyFileSize(approximateBytesPerSplit)); - Collection> splitRanges = split(tokenRange, (int) targetRanges); - int splitRangeCount = 0; - for (Range splitRange : splitRanges) - { - RepairAssignment assignment = new BytesBasedRepairAssignment(splitRange, keyspaceName, Collections.singletonList(tableName), approximateBytesPerSplit); - if (canAddAssignment(assignment, targetBytesSoFar, approximateBytesPerSplit)) - { - logger.info("Added repair assignment for {}.{} for subrange {} (#{}/{}) with approximateBytes={}", - keyspaceName, tableName, splitRange, ++splitRangeCount, splitRanges.size(), FileUtils.stringifyFileSize(approximateBytesPerSplit)); - repairAssignments.add(assignment); - targetBytesSoFar += approximateBytesPerSplit; - } - else - return repairAssignments; - } - } - } - else - { - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); - long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); - if (memtableSize > 0L) - { - logger.info("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", keyspaceName, tableName, tokenRange, memtableSize); - RepairAssignment assignment = new BytesBasedRepairAssignment(tokenRange, keyspaceName, Collections.singletonList(tableName), memtableSize); - if (targetBytesSoFar >= maxBytesPerScheduleBytes) - { - return repairAssignments; - } - repairAssignments.add(assignment); - targetBytesSoFar += memtableSize; - } - else - { - logger.info("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", keyspaceName, tableName, tokenRange); - } - } - } - return repairAssignments; - } - - private boolean canAddAssignment(RepairAssignment repairAssignment, long targetBytesSoFar, long bytesToBeAdded) - { - if (targetBytesSoFar + bytesToBeAdded < maxBytesPerScheduleBytes) - { - return true; - } - logger.warn("Refusing to add {} with a target size of {} because it would increase total repair bytes to {} which is greater than {}={}", - repairAssignment, FileUtils.stringifyFileSize(bytesToBeAdded), FileUtils.stringifyFileSize(targetBytesSoFar + bytesToBeAdded), MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule); - return false; - } - - public Collection> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) - { - // Collect all applicable token ranges - Collection> wrappedRanges; - if (primaryRangeOnly) - { - wrappedRanges = StorageService.instance.getPrimaryRanges(keyspaceName); - } - else - { - wrappedRanges = StorageService.instance.getLocalRanges(keyspaceName); - } - - // Unwrap each range as we need to account for ranges that overlap the ring - Collection> ranges = new ArrayList<>(); - for (Range wrappedRange : wrappedRanges) - { - ranges.addAll(wrappedRange.unwrap()); - } - - return ranges; - } - - public Refs getSSTableReaderRefs(String keyspaceName, String tableName, Range tokenRange) - { - final ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); - - if (cfs == null) - { - throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, tableName)); - } - - Iterable sstables = cfs.getTracker().getView().select(SSTableSet.CANONICAL); - SSTableIntervalTree tree = SSTableIntervalTree.build(sstables); - Range r = Range.makeRowRange(tokenRange); - Iterable canonicalSSTables = View.sstablesInBounds(r.left, r.right, tree); - - // TODO: may need to reason about this not working. - return Refs.ref(canonicalSSTables); - } - - public static class BytesBasedRepairAssignment extends RepairAssignment - { - private final long approximateBytes; - - public BytesBasedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames, long approximateBytes) - { - super(tokenRange, keyspaceName, tableNames); - this.approximateBytes = approximateBytes; - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - if (!super.equals(o)) return false; - BytesBasedRepairAssignment that = (BytesBasedRepairAssignment) o; - return approximateBytes == that.approximateBytes; - } - - @Override - public int hashCode() - { - return Objects.hash(super.hashCode(), approximateBytes); - } - - @Override - public String toString() - { - return "BytesBasedRepairAssignment{" + - "keyspaceName='" + keyspaceName + '\'' + - ", approximateBytes=" + approximateBytes + - ", tokenRange=" + tokenRange + - ", tableNames=" + tableNames + - '}'; - } - - public long getApproximateBytes() - { - return approximateBytes; - } - } -} From 053d3ac9673fc6e00bd157d5c86f1a1ca1bd6964 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sat, 30 Nov 2024 15:10:02 -0600 Subject: [PATCH 083/257] Refactor RepairRangeSplitter interface to accept all keyspaces This enables us to evaluate all RepairAssignments in the schedule at once which enhances the ability to order and restrict the RepairAssignments returned based on the splitter implementation. For example, RepairRangeSplitter's max_bytes_per_schedule would be more useful if it was evaluated for all keyspaces collectively instead of per keyspace. Also refactors AutoRepair to order keyspaces by highest table priority. This may be enhanced later to bucket repair submissions by priority instead of ordering keyspaces by highest table priority. --- .../repair/autorepair/AutoRepair.java | 54 ++++++- .../DefaultAutoRepairTokenSplitter.java | 15 +- .../IAutoRepairTokenRangeSplitter.java | 6 +- .../autorepair/RepairRangeSplitter.java | 150 ++++++++++-------- ...DefaultTokenSplitterParameterizedTest.java | 76 +++++---- .../AutoRepairParameterizedTest.java | 15 +- .../autorepair/RepairRangeSplitterTest.java | 12 +- 7 files changed, 214 insertions(+), 114 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 1abb71d69cac..2ec92276ec2f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -22,6 +22,7 @@ import java.util.EnumMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -147,6 +148,37 @@ public void repairAsync(AutoRepairConfig.RepairType repairType) repairExecutors.get(repairType).submit(() -> repair(repairType)); } + /** + * @return The priority of the given table if defined, otherwise 0. + */ + private int getPriority(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName) + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + return cfs != null ? cfs.metadata().params.automatedRepair.get(repairType).priority() : 0; + } + + /** + * @return A new map of keyspaces to tables sorted by the keyspace's underlying highest table priority. + */ + private Map> sortKeyspaceMapByTablePriority(AutoRepairConfig.RepairType repairType, Map> keyspacesAndTablesToRepair) + { + // Sort assignments by keyspace with tables with the highest priority. + List>> entriesOrderedByTablePriority = new ArrayList<>(keyspacesAndTablesToRepair.entrySet()); + entriesOrderedByTablePriority.sort((a, b) -> { + int aMaxPriority = a.getValue().stream().mapToInt((tableName) -> getPriority(repairType, a.getKey(), tableName)).max().orElse(0); + int bMaxPriority = b.getValue().stream().mapToInt((tableName) -> getPriority(repairType, b.getKey(), tableName)).max().orElse(0); + return bMaxPriority - aMaxPriority; + }); + + Map> sortedKeyspacesToRepair = new LinkedHashMap<>(); + for (Map.Entry> sortedEntries : entriesOrderedByTablePriority) + { + sortedKeyspacesToRepair.put(sortedEntries.getKey(), sortedEntries.getValue()); + } + + return sortedKeyspacesToRepair; + } + // repair runs a repair session of the given type synchronously. public void repair(AutoRepairConfig.RepairType repairType) { @@ -205,18 +237,30 @@ public void repair(AutoRepairConfig.RepairType repairType) // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. shuffleFunc.accept(keyspaces); + // Filter out keyspaces and tables to repair and group into a map by keyspace. + Map> keyspacesAndTablesToRepair = new LinkedHashMap<>(); for (Keyspace keyspace : keyspaces) { if (!AutoRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) { continue; } - - repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); List tablesToBeRepairedList = retrieveTablesToBeRepaired(keyspace, config, repairType, repairState, collectectedRepairStats); shuffleFunc.accept(tablesToBeRepairedList); - String keyspaceName = keyspace.getName(); - List repairAssignments = tokenRangeSplitters.get(repairType).getRepairAssignments(repairType, primaryRangeOnly, keyspaceName, tablesToBeRepairedList); + keyspacesAndTablesToRepair.put(keyspace.getName(), tablesToBeRepairedList); + } + // sort keyspaces to repair by table priority + Map> sortedKeyspacesToRepair = sortKeyspaceMapByTablePriority(repairType, keyspacesAndTablesToRepair); + + // calculate the repair assignments for each kesypace. + Map> repairAssignmentsByKeyspace = tokenRangeSplitters.get(repairType).getRepairAssignments(repairType, primaryRangeOnly, sortedKeyspacesToRepair); + + // evaluate over each keyspace's repair assignments. + for (Map.Entry> keyspaceAssignments : repairAssignmentsByKeyspace.entrySet()) + { + String keyspaceName = keyspaceAssignments.getKey(); + List repairAssignments = keyspaceAssignments.getValue(); + repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); int totalRepairAssignments = repairAssignments.size(); long keyspaceStartTime = timeFunc.get(); @@ -243,7 +287,7 @@ public void repair(AutoRepairConfig.RepairType repairType) repairState.setRepairInProgress(false); return; } - if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, tablesToBeRepairedList.size())) + if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, sortedKeyspacesToRepair.get(keyspaceName).size())) { collectectedRepairStats.skippedTokenRanges += totalRepairAssignments - totalProcessedAssignments; logger.info("Keyspace took too much time to repair hence skipping it {}", diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java index c0ddc6b9aa7e..7a106411373e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java @@ -21,7 +21,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import org.apache.cassandra.service.AutoRepairService; @@ -34,7 +36,18 @@ public class DefaultAutoRepairTokenSplitter implements IAutoRepairTokenRangeSplitter { @Override - public List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) + public Map> getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, Map> keyspacesAndTablesToRepair) + { + Map> repairAssignmentsByKeyspace = new LinkedHashMap<>(); + for (Map.Entry> keyspace : keyspacesAndTablesToRepair.entrySet()) + { + repairAssignmentsByKeyspace.put(keyspace.getKey(), getRepairAssignmentsByKeyspace(repairType, primaryRangeOnly, keyspace.getKey(), keyspace.getValue())); + } + + return repairAssignmentsByKeyspace; + } + + private List getRepairAssignmentsByKeyspace(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); List repairAssignments = new ArrayList<>(); diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index d43a9f562594..d64916dcbd58 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -28,7 +28,6 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.schema.AutoRepairParams; public interface IAutoRepairTokenRangeSplitter { @@ -38,11 +37,10 @@ public interface IAutoRepairTokenRangeSplitter * The autorepair framework will repair the list of returned subrange in a sequence. * @param repairType The type of repair being executed * @param primaryRangeOnly Whether to repair only this node's primary ranges or all of its ranges. - * @param keyspaceName The keyspace being repaired - * @param tableNames The tables to repair + * @param keyspacesAndTablesToRepair A map keyed by keyspace name and valued by tables to generate assignments for. * @return repair assignments broken up by range, keyspace and tables. */ - List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames); + Map> getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, Map> keyspacesAndTablesToRepair); /** * Reorders the list of {@link RepairAssignment} objects based on their priority for a given repair type. diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 93e7431a15c1..00a7a9dc9e95 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -53,6 +54,7 @@ import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; import org.apache.cassandra.io.sstable.metadata.MetadataType; import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.concurrent.Refs; @@ -155,28 +157,40 @@ public RepairRangeSplitter(Map parameters) } @Override - public List getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) + public Map> getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, Map> keyspacesAndTablesToRepair) { - logger.debug("Calculating token range splits for repairType={} primaryRangeOnly={} keyspaceName={} tableNames={}", repairType, primaryRangeOnly, keyspaceName, tableNames); - Collection> tokenRanges = getTokenRanges(primaryRangeOnly, keyspaceName); - return getRepairAssignments(repairType, keyspaceName, tableNames, tokenRanges); + Map> repairAssignmentsByKeyspace = new LinkedHashMap<>(); + for (Map.Entry> keyspace : keyspacesAndTablesToRepair.entrySet()) + { + logger.debug("Calculating token range splits for repairType={} primaryRangeOnly={} keyspaceName={} tableNames={}", repairType, primaryRangeOnly, keyspace.getKey(), keyspace.getValue()); + Collection> tokenRanges = getTokenRanges(primaryRangeOnly, keyspace.getKey()); + repairAssignmentsByKeyspace.put(keyspace.getKey(), getRepairAssignments(repairType, keyspace.getKey(), keyspace.getValue(), tokenRanges)); + } + + return filterAndOrderRepairAssignments(repairType, repairAssignmentsByKeyspace); } @VisibleForTesting - List getRepairAssignments(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Collection> tokenRanges) + List getRepairAssignments(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Collection> tokenRanges) { List repairAssignments = new ArrayList<>(); // this is used for batching minimal single assignment tables together List currentAssignments = new ArrayList<>(); - // sort the tables by size so can batch the smallest ones together - tableNames.sort((t1, t2) -> { - ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); - ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); - if (cfs1 == null || cfs2 == null) - throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, t1)); - return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); - }); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + + // If we can repair by keyspace, sort the tables by size so can batch the smallest ones together + boolean repairByKeyspace = config.getRepairByKeyspace(repairType); + if (repairByKeyspace) + { + tableNames.sort((t1, t2) -> { + ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); + ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); + if (cfs1 == null || cfs2 == null) + throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, t1)); + return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); + }); + } for (String tableName : tableNames) { @@ -185,8 +199,13 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy if (tableAssignments.isEmpty()) continue; + // if not repairing by keyspace don't attempt to batch them with others. + if (!repairByKeyspace) + { + repairAssignments.addAll(tableAssignments); + } // If the table assignments are for the same token range, and we have room to add more tables to the current assignment - if (tableAssignments.size() == 1 && + else if (tableAssignments.size() == 1 && currentAssignments.size() < tablesPerAssignmentLimit && (currentAssignments.isEmpty() || currentAssignments.get(0).getTokenRange().equals(tableAssignments.get(0).getTokenRange()))) { @@ -216,77 +235,92 @@ List getRepairAssignments(AutoRepairConfig.RepairType repairTy if (!currentAssignments.isEmpty()) repairAssignments.add(merge(currentAssignments)); - return filterRepairAssignments(repairType, keyspaceName, repairAssignments); + return repairAssignments; } /** - * Given a repair type and list of sized-based repair assignments order them by priority of the + * Given a repair type and map of sized-based repair assignments by keyspace order them by priority of the * assignments' underlying tables and confine them by maxBytesPerSchedule. * @param repairType used to determine underyling table priorities - * @param repairAssignments the assignments to filter. + * @param repairAssignmentsByKeyspace the assignments to filter. * @return A list of repair assignments ordered by priority and confined by maxBytesPerSchedule. */ - protected List filterRepairAssignments(AutoRepairConfig.RepairType repairType, String keyspaceName, List repairAssignments) + @VisibleForTesting + Map> filterAndOrderRepairAssignments(AutoRepairConfig.RepairType repairType, Map> repairAssignmentsByKeyspace) { - if (repairAssignments.isEmpty()) - return Collections.emptyList(); - - // Reorder the repair assignments - reorderByPriority(repairAssignments, repairType); - // Confine repair assignments by maxBytesPerSchedule. long bytesSoFar = 0L; - boolean isIncremental = false; long bytesNotRepaired = 0L; int assignmentsNotRepaired = 0; - List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); - for (SizedRepairAssignment repairAssignment : repairAssignments) + int assignmentsToRepair = 0; + int totalAssignments = 0; + + Map> filteredRepairAssignmentsByKeyspace = new LinkedHashMap<>(); + for (Map.Entry> keyspaceAssignments : repairAssignmentsByKeyspace.entrySet()) { - // skip any repair assignments that would accumulate us past the maxBytesPerSchedule - if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule) + String keyspace = keyspaceAssignments.getKey(); + List repairAssignments = keyspaceAssignments.getValue(); + + // Reorder the repair assignments + reorderByPriority(repairAssignments, repairType); + + List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); + for (SizedRepairAssignment repairAssignment : repairAssignments) { - // log that repair assignment was skipped. - bytesNotRepaired += repairAssignment.getEstimatedBytes(); - assignmentsNotRepaired++; - logger.warn("Skipping {} because it would increase total repair bytes to {}", - repairAssignment, - getBytesOfMaxBytesPerSchedule(bytesSoFar + repairAssignment.getEstimatedBytes())); + totalAssignments++; + // skip any repair assignments that would accumulate us past the maxBytesPerSchedule + if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule) + { + // log that repair assignment was skipped. + bytesNotRepaired += repairAssignment.getEstimatedBytes(); + assignmentsNotRepaired++; + logger.warn("Skipping {} because it would increase total repair bytes to {}", + repairAssignment, + getBytesOfMaxBytesPerSchedule(bytesSoFar + repairAssignment.getEstimatedBytes())); + } + else + { + bytesSoFar += repairAssignment.getEstimatedBytes(); + assignmentsToRepair++; + logger.info("Adding {}, increasing repair bytes to {}", + repairAssignment, + getBytesOfMaxBytesPerSchedule(bytesSoFar)); + assignmentsToReturn.add(repairAssignment); + } } - else + + if (!assignmentsToReturn.isEmpty()) { - bytesSoFar += repairAssignment.getEstimatedBytes(); - logger.info("Adding {}, increasing repair bytes to {}", - repairAssignment, - getBytesOfMaxBytesPerSchedule(bytesSoFar)); - assignmentsToReturn.add(repairAssignment); + filteredRepairAssignmentsByKeyspace.put(keyspace, assignmentsToReturn); } } - String message = "Returning {} assignment(s) for {}, totaling {}"; + String message = "Returning {} assignment(s), totaling {}"; if (assignmentsNotRepaired != 0) { message += ". Skipping {} of {} assignment(s), totaling {}"; if (repairType != AutoRepairConfig.RepairType.INCREMENTAL) { message += ". The entire primary range will not be repaired this schedule. " + - "Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure " + - "all ranges do get repaired within gc_grace_seconds"; - logger.warn(message, assignmentsToReturn.size(), keyspaceName, getBytesOfMaxBytesPerSchedule(bytesSoFar), - assignmentsNotRepaired, repairAssignments.size(), + "Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure " + + "all ranges do get repaired within gc_grace_seconds"; + logger.warn(message, assignmentsToRepair, getBytesOfMaxBytesPerSchedule(bytesSoFar), + assignmentsNotRepaired, totalAssignments, FileUtils.stringifyFileSize(bytesNotRepaired)); } else { - logger.info(message, assignmentsToReturn.size(), keyspaceName, getBytesOfMaxBytesPerSchedule(bytesSoFar), - assignmentsNotRepaired, repairAssignments.size(), + logger.info(message, assignmentsToRepair, getBytesOfMaxBytesPerSchedule(bytesSoFar), + assignmentsNotRepaired, totalAssignments, FileUtils.stringifyFileSize(bytesNotRepaired)); } } else { - logger.info(message, assignmentsToReturn.size(), keyspaceName, getBytesOfMaxBytesPerSchedule(bytesSoFar)); + logger.info(message, assignmentsToRepair, getBytesOfMaxBytesPerSchedule(bytesSoFar)); } - return assignmentsToReturn; + + return filteredRepairAssignmentsByKeyspace; } private String getBytesOfMaxBytesPerSchedule(long bytes) { @@ -398,22 +432,6 @@ protected List getRepairAssignments(List si return repairAssignments; } - private void warnMaxBytesPerSchedule(AutoRepairConfig.RepairType repairType, SizedRepairAssignment repairAssignment) - { - String warning = "Refusing to add repair assignment {} because it would increase total repair bytes to a value greater than {} ({})"; - if (repairType == AutoRepairConfig.RepairType.FULL) - { - warning = warning + ", everything will not be repaired this schedule. Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure all ranges do get repaired within gc_grace_seconds"; - } - // TODO: Add two metrics: - // Meter: RepairRangeSplitter.RepairType.SkippedAssignments - // Meter: RepairRangeSplitter.RepairType.SkippedBytes - logger.warn(warning, - repairAssignment, - FileUtils.stringifyFileSize(maxBytesPerSchedule), - FileUtils.stringifyFileSize(maxBytesPerSchedule + repairAssignment.getEstimatedBytes())); - } - private int calculateNumberOfSplits(SizeEstimate estimate) { // Calculate the number of splits needed for size and partitions diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java index 804d0a712b48..8cee64f5a238 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java @@ -24,6 +24,8 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.LinkedHashMap; +import java.util.Map; import org.junit.BeforeClass; import org.junit.Test; @@ -40,12 +42,12 @@ import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadata; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; @RunWith(Parameterized.class) public class AutoRepairDefaultTokenSplitterParameterizedTest @@ -72,9 +74,9 @@ public static void setupClass() throws Exception DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); ServerTestUtils.prepareServerNoRegister(); - Token t1 = new Murmur3Partitioner.LongToken(0); - Token t2 = new Murmur3Partitioner.LongToken(256); - Token t3 = new Murmur3Partitioner.LongToken(1024); + Token t1 = new Murmur3Partitioner.LongToken(-9223372036854775808L); + Token t2 = new Murmur3Partitioner.LongToken(-3074457345618258603L); + Token t3 = new Murmur3Partitioner.LongToken(3074457345618258602L); Set tokens = new HashSet<>(); tokens.add(t1); tokens.add(t2); @@ -89,25 +91,6 @@ public static void setupClass() throws Exception QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); } - private static void appendExpectedTokens(long left, long right, int numberOfSplits, List> expectedToken) - { - long repairTokenWidth = (right - left) / numberOfSplits; - for (int i = 0; i < numberOfSplits; i++) - { - long curLeft = left + (i * repairTokenWidth); - long curRight = curLeft + repairTokenWidth; - if ((i + 1) == numberOfSplits) - { - curRight = right; - } - Token childStartToken = ClusterMetadata.current() - .partitioner.getTokenFactory().fromString("" + curLeft); - Token childEndToken = ClusterMetadata.current() - .partitioner.getTokenFactory().fromString("" + curRight); - expectedToken.add(new Range<>(childStartToken, childEndToken)); - } - } - @Test public void testTokenRangesSplitByTable() { @@ -120,15 +103,24 @@ public void testTokenRangesSplitByTable() List> expectedToken = new ArrayList<>(); for (int i = 0; i < tables.size(); i++) { - appendExpectedTokens(1024, 0, numberOfSplits, expectedToken); - appendExpectedTokens(0, 256, numberOfSplits, expectedToken); - appendExpectedTokens(256, 1024, numberOfSplits, expectedToken); + for (Range range : tokens) + { + expectedToken.addAll(AutoRepairUtils.split(range, numberOfSplits)); + } } AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairSubRangeNum(repairType, numberOfSplits); - List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); - assertEquals(totalTokenRanges * numberOfSplits * tables.size(), assignments.size()); + Map> keyspaceToTables = new LinkedHashMap<>(); + keyspaceToTables.put(KEYSPACE, tables); + Map> assignmentsByKeyspace = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, keyspaceToTables); + + // should be 1 entry for the keyspace. + assertEquals(1, assignmentsByKeyspace.size()); + List assignments = assignmentsByKeyspace.get(KEYSPACE); + assertNotNull(assignments); + + assertEquals(totalTokenRanges*numberOfSplits*tables.size(), assignments.size()); assertEquals(expectedToken.size(), assignments.size()); int expectedTableIndex = -1; @@ -138,6 +130,15 @@ public void testTokenRangesSplitByTable() { expectedTableIndex++; } + } + + expectedTableIndex = -1; + for (int i = 0; i tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); - appendExpectedTokens(1024, 0, numberOfSplits, expectedToken); - appendExpectedTokens(0, 256, numberOfSplits, expectedToken); - appendExpectedTokens(256, 1024, numberOfSplits, expectedToken); + for (Range range : tokens) + { + expectedToken.addAll(AutoRepairUtils.split(range, numberOfSplits)); + } AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairSubRangeNum(repairType, numberOfSplits); - List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, tables); - assertEquals(totalTokenRanges * numberOfSplits, assignments.size()); + Map> keyspaceToTables = new LinkedHashMap<>(); + keyspaceToTables.put(KEYSPACE, tables); + Map> assignmentsByKeyspace = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, keyspaceToTables); + + // should be 1 entry for the keyspace. + assertEquals(1, assignmentsByKeyspace.size()); + List assignments = assignmentsByKeyspace.get(KEYSPACE); + assertNotNull(assignments); + + assertEquals(totalTokenRanges*numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); for (int i = 0; i < totalTokenRanges * numberOfSplits; i++) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index f4bfd04dda6f..6a851f7a87aa 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -23,7 +23,9 @@ import java.util.Collection; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; @@ -72,6 +74,7 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; @@ -456,6 +459,7 @@ public void testMetrics() AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMVRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); + config.setRepairRetryBackoff("0s"); config.setAutoRepairTableMaxRepairTime(repairType, "0s"); AutoRepair.timeFunc = () -> { timeFuncCalls++; @@ -544,7 +548,16 @@ public void testTokenRangesNoSplit() assertEquals(1, tokens.size()); List> expectedToken = new ArrayList<>(tokens); - List assignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, KEYSPACE, Collections.singletonList(TABLE)); + Map> keyspaceToTables = new LinkedHashMap<>(); + keyspaceToTables.put(KEYSPACE, Collections.singletonList(TABLE)); + Map> assignmentsByKeyspace = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, keyspaceToTables); + + // should be 1 entry for the keyspace. + assertEquals(1, assignmentsByKeyspace.size()); + List assignments = assignmentsByKeyspace.get(KEYSPACE); + assertNotNull(assignments); + + // should be 1 entry for the table which covers the full range. assertEquals(1, assignments.size()); assertEquals(expectedToken.get(0).left, assignments.get(0).getTokenRange().left); assertEquals(expectedToken.get(0).right, assignments.get(0).getTokenRange().right); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index 6c91fbd539be..0f18d82eec3f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -40,6 +40,7 @@ import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SizedRepairAssignment; +import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.concurrent.Refs; import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.TABLE_BATCH_LIMIT; @@ -56,6 +57,9 @@ public class RepairRangeSplitterTest extends CQLTester public static void setUpClass() { CQLTester.setUpClass(); + AutoRepairService.setup(); + // TODO: For now, always repair by keyspace to exercise priority tests + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(AutoRepairConfig.RepairType.FULL, true); FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); } @@ -205,7 +209,7 @@ public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable { Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); // We expect two assignments, one with table1 and table2 batched, and one with table3 assertEquals(2, assignments.size()); @@ -219,7 +223,7 @@ public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable { Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(2); - List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); // We expect one assignment, with two tables batched assertEquals(1, assignments.size()); @@ -232,7 +236,7 @@ public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable { Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); assertEquals(3, assignments.size()); } @@ -243,7 +247,7 @@ public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(5); - List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); assertEquals(1, assignments.size()); } From 25ca0774f5624291f74f38c862e702650fc975d6 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sat, 30 Nov 2024 15:17:46 -0600 Subject: [PATCH 084/257] Naming consistency getRepairAssignmentsByKeyspace -> getRepairAssignmentsForKeyspace getRepairAssignments -> getRepairAssignmentsForKeyspace --- .../repair/autorepair/DefaultAutoRepairTokenSplitter.java | 4 ++-- .../cassandra/repair/autorepair/RepairRangeSplitter.java | 4 ++-- .../repair/autorepair/RepairRangeSplitterTest.java | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java index 7a106411373e..05e6bafa3173 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java @@ -41,13 +41,13 @@ public Map> getRepairAssignments(AutoRepairConfig Map> repairAssignmentsByKeyspace = new LinkedHashMap<>(); for (Map.Entry> keyspace : keyspacesAndTablesToRepair.entrySet()) { - repairAssignmentsByKeyspace.put(keyspace.getKey(), getRepairAssignmentsByKeyspace(repairType, primaryRangeOnly, keyspace.getKey(), keyspace.getValue())); + repairAssignmentsByKeyspace.put(keyspace.getKey(), getRepairAssignmentsForKeyspace(repairType, primaryRangeOnly, keyspace.getKey(), keyspace.getValue())); } return repairAssignmentsByKeyspace; } - private List getRepairAssignmentsByKeyspace(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) + private List getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); List repairAssignments = new ArrayList<>(); diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 00a7a9dc9e95..838e4ce407af 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -164,14 +164,14 @@ public Map> getRepairAssignments(AutoRepairConfig { logger.debug("Calculating token range splits for repairType={} primaryRangeOnly={} keyspaceName={} tableNames={}", repairType, primaryRangeOnly, keyspace.getKey(), keyspace.getValue()); Collection> tokenRanges = getTokenRanges(primaryRangeOnly, keyspace.getKey()); - repairAssignmentsByKeyspace.put(keyspace.getKey(), getRepairAssignments(repairType, keyspace.getKey(), keyspace.getValue(), tokenRanges)); + repairAssignmentsByKeyspace.put(keyspace.getKey(), getRepairAssignmentsForKeyspace(repairType, keyspace.getKey(), keyspace.getValue(), tokenRanges)); } return filterAndOrderRepairAssignments(repairType, repairAssignmentsByKeyspace); } @VisibleForTesting - List getRepairAssignments(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Collection> tokenRanges) + List getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Collection> tokenRanges) { List repairAssignments = new ArrayList<>(); // this is used for batching minimal single assignment tables together diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index 0f18d82eec3f..606737817ac4 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -209,7 +209,7 @@ public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable { Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); // We expect two assignments, one with table1 and table2 batched, and one with table3 assertEquals(2, assignments.size()); @@ -223,7 +223,7 @@ public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable { Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(2); - List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); // We expect one assignment, with two tables batched assertEquals(1, assignments.size()); @@ -236,7 +236,7 @@ public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable { Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); assertEquals(3, assignments.size()); } @@ -247,7 +247,7 @@ public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(5); - List assignments = repairRangeSplitter.getRepairAssignments(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); assertEquals(1, assignments.size()); } From 4592e8d76f87bb8fa7c6ae66aaf42633f1e3d589 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sat, 30 Nov 2024 15:47:17 -0600 Subject: [PATCH 085/257] Use memtable size for memtable only --- .../apache/cassandra/repair/autorepair/RepairRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 838e4ce407af..7dd2b5e1ff5f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -394,7 +394,7 @@ protected List getRepairAssignments(List si if (memtableSize > 0L) { logger.debug("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", estimate.keyspace, estimate.table, estimate.tokenRange, memtableSize); - SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), "memtable only", estimate.sizeInRange); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), "memtable only", memtableSize); repairAssignments.add(assignment); } else From 22f8bdbb74adee0bfdfbbee240c44a2622a3330f Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 1 Dec 2024 18:48:43 -0600 Subject: [PATCH 086/257] Fix dtest --- .../distributed/test/repair/AutoRepairSchedulerTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 3c180dbdd613..caf1ad7c3ea0 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -60,14 +60,14 @@ public static void init() throws IOException .set("auto_repair", ImmutableMap.of( "repair_type_overrides", - ImmutableMap.of(AutoRepairConfig.RepairType.FULL.toString(), + ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", "parallel_repair_count", "1", "parallel_repair_percentage", "0", "min_repair_interval", "1s"), - AutoRepairConfig.RepairType.INCREMENTAL.toString(), + AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", From 317d3183d17a16e4c2574add154257e5cca0828e Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 1 Dec 2024 19:37:52 -0600 Subject: [PATCH 087/257] Refactor IAutoRepairTokenRangeSplitter to be Iterator based Refactors IAutoRepairTokenRangeSplitter interface to be iterator based. This makes generation of RepairAssignments for priority buckets and keyspaces lazy instead of generating the RepairAssignments all at once. The main value here is that repairs can take some time, and it is beneficial to calculate repair assignments as close to when repairs would be submitted for a keyspace as possible. Additionally, introduced "PrioritizedRepairPlan" and "KeyspaceRepairPlan" types. Rather than passing around multiple-depth Maps where the contents aren't clear, create specialized types. Introduces PrioritizedRepairPlan.build for organizing a desired repair plan by grouping tables together that share the same priority. Moves RepairAssignment up to its own class to simplify the IAutoRepairTokenRangeSplitter. Since grouping tables by priority is now done in AutoRepair, the splitter implementations no longer have to reason with priority so all of this logic has been removed from the splitter code. Instead, they will now be given a List that is ordered by priority. --- .../repair/autorepair/AutoRepair.java | 302 +++++++++--------- .../DefaultAutoRepairTokenSplitter.java | 69 +++- .../IAutoRepairTokenRangeSplitter.java | 103 +----- .../autorepair/KeyspaceRepairAssignments.java | 53 +++ .../repair/autorepair/KeyspaceRepairPlan.java | 71 ++++ .../autorepair/PrioritizedRepairPlan.java | 160 ++++++++++ .../repair/autorepair/RepairAssignment.java | 85 +++++ .../autorepair/RepairRangeSplitter.java | 160 +++++++--- ...DefaultTokenSplitterParameterizedTest.java | 42 ++- .../AutoRepairParameterizedTest.java | 20 +- .../autorepair/PrioritizedRepairPlanTest.java | 125 ++++++++ .../autorepair/RepairRangeSplitterTest.java | 142 ++------ 12 files changed, 883 insertions(+), 449 deletions(-) create mode 100644 src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairAssignments.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairPlan.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 2ec92276ec2f..1a6ad2c7db27 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -41,7 +41,6 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -157,28 +156,6 @@ private int getPriority(AutoRepairConfig.RepairType repairType, String keyspaceN return cfs != null ? cfs.metadata().params.automatedRepair.get(repairType).priority() : 0; } - /** - * @return A new map of keyspaces to tables sorted by the keyspace's underlying highest table priority. - */ - private Map> sortKeyspaceMapByTablePriority(AutoRepairConfig.RepairType repairType, Map> keyspacesAndTablesToRepair) - { - // Sort assignments by keyspace with tables with the highest priority. - List>> entriesOrderedByTablePriority = new ArrayList<>(keyspacesAndTablesToRepair.entrySet()); - entriesOrderedByTablePriority.sort((a, b) -> { - int aMaxPriority = a.getValue().stream().mapToInt((tableName) -> getPriority(repairType, a.getKey(), tableName)).max().orElse(0); - int bMaxPriority = b.getValue().stream().mapToInt((tableName) -> getPriority(repairType, b.getKey(), tableName)).max().orElse(0); - return bMaxPriority - aMaxPriority; - }); - - Map> sortedKeyspacesToRepair = new LinkedHashMap<>(); - for (Map.Entry> sortedEntries : entriesOrderedByTablePriority) - { - sortedKeyspacesToRepair.put(sortedEntries.getKey(), sortedEntries.getValue()); - } - - return sortedKeyspacesToRepair; - } - // repair runs a repair session of the given type synchronously. public void repair(AutoRepairConfig.RepairType repairType) { @@ -229,7 +206,7 @@ public void repair(AutoRepairConfig.RepairType repairType) repairState.setTotalTablesConsideredForRepair(0); repairState.setTotalMVTablesConsideredForRepair(0); - CollectectedRepairStats collectectedRepairStats = new CollectectedRepairStats(); + CollectedRepairStats collectedRepairStats = new CollectedRepairStats(); List keyspaces = new ArrayList<>(); Keyspace.all().forEach(keyspaces::add); @@ -245,136 +222,32 @@ public void repair(AutoRepairConfig.RepairType repairType) { continue; } - List tablesToBeRepairedList = retrieveTablesToBeRepaired(keyspace, config, repairType, repairState, collectectedRepairStats); + List tablesToBeRepairedList = retrieveTablesToBeRepaired(keyspace, config, repairType, repairState, collectedRepairStats); shuffleFunc.accept(tablesToBeRepairedList); keyspacesAndTablesToRepair.put(keyspace.getName(), tablesToBeRepairedList); } - // sort keyspaces to repair by table priority - Map> sortedKeyspacesToRepair = sortKeyspaceMapByTablePriority(repairType, keyspacesAndTablesToRepair); - // calculate the repair assignments for each kesypace. - Map> repairAssignmentsByKeyspace = tokenRangeSplitters.get(repairType).getRepairAssignments(repairType, primaryRangeOnly, sortedKeyspacesToRepair); + // Separate out the keyspaces and tables to repair based on their priority, with each repair plan representing a uniquely occuring priority. + List repairPlans = PrioritizedRepairPlan.build(keyspacesAndTablesToRepair, repairType, shuffleFunc); - // evaluate over each keyspace's repair assignments. - for (Map.Entry> keyspaceAssignments : repairAssignmentsByKeyspace.entrySet()) + // calculate the repair assignments for each priority:keyspace. + Iterator repairAssignmentsIterator = tokenRangeSplitters.get(repairType).getRepairAssignments(repairType, primaryRangeOnly, repairPlans); + + while (repairAssignmentsIterator.hasNext()) { - String keyspaceName = keyspaceAssignments.getKey(); - List repairAssignments = keyspaceAssignments.getValue(); - repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); - - int totalRepairAssignments = repairAssignments.size(); - long keyspaceStartTime = timeFunc.get(); - RepairAssignment previousAssignment = null; - long tableStartTime = timeFunc.get(); - int totalProcessedAssignments = 0; - Set> ranges = new HashSet<>(); - for (RepairAssignment curRepairAssignment : repairAssignments) + KeyspaceRepairAssignments repairAssignments = repairAssignmentsIterator.next(); + List assignments = repairAssignments.getRepairAssignments(); + if (assignments.isEmpty()) { - try - { - totalProcessedAssignments++; - boolean repairOneTableAtATime = !config.getRepairByKeyspace(repairType); - if (previousAssignment != null && repairOneTableAtATime && !previousAssignment.tableNames.equals(curRepairAssignment.tableNames)) - { - // In the repair assignment, all the tables are appended sequnetially. - // Check if we have a different table, and if so, we should reset the table start time. - tableStartTime = timeFunc.get(); - } - previousAssignment = curRepairAssignment; - if (!config.isAutoRepairEnabled(repairType)) - { - logger.error("Auto-repair for type {} is disabled hence not running repair", repairType); - repairState.setRepairInProgress(false); - return; - } - if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, sortedKeyspacesToRepair.get(keyspaceName).size())) - { - collectectedRepairStats.skippedTokenRanges += totalRepairAssignments - totalProcessedAssignments; - logger.info("Keyspace took too much time to repair hence skipping it {}", - keyspaceName); - break; - } - if (repairOneTableAtATime && AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) - { - collectectedRepairStats.skippedTokenRanges += 1; - logger.info("Table took too much time to repair hence skipping it table name {}.{}, token range {}", - keyspaceName, curRepairAssignment.tableNames, curRepairAssignment.tokenRange); - continue; - } - - Range tokenRange = curRepairAssignment.getTokenRange(); - logger.debug("Current Token Left side {}, right side {}", - tokenRange.left.toString(), - tokenRange.right.toString()); - - ranges.add(curRepairAssignment.getTokenRange()); - if ((totalProcessedAssignments % config.getRepairThreads(repairType) == 0) || - (totalProcessedAssignments == totalRepairAssignments)) - { - int retryCount = 0; - Future f = null; - while (retryCount <= config.getRepairMaxRetries()) - { - RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, - Lists.newArrayList(curRepairAssignment.getTableNames()), - ranges, primaryRangeOnly); - repairState.resetWaitCondition(); - f = repairRunnableExecutors.get(repairType).submit(task); - try - { - repairState.waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); - } - catch (InterruptedException e) - { - logger.error("Exception in cond await:", e); - } - if (repairState.isSuccess()) - { - break; - } - else if (retryCount < config.getRepairMaxRetries()) - { - boolean cancellationStatus = f.cancel(true); - logger.warn("Repair failed for range {}-{} for {} tables {} with cancellationStatus: {} retrying after {} seconds...", - tokenRange.left, tokenRange.right, - keyspaceName, curRepairAssignment.getTableNames(), - cancellationStatus, config.getRepairRetryBackoff().toSeconds()); - sleepFunc.accept(config.getRepairRetryBackoff().toSeconds(), TimeUnit.SECONDS); - } - retryCount++; - } - //check repair status - if (repairState.isSuccess()) - { - logger.info("Repair completed for range {}-{} for {} tables {}, total assignments: {}," + - "processed assignments: {}", tokenRange.left, tokenRange.right, - keyspaceName, curRepairAssignment.getTableNames(), totalRepairAssignments, totalProcessedAssignments); - collectectedRepairStats.succeededTokenRanges += ranges.size(); - } - else - { - boolean cancellationStatus = true; - if (f != null) - { - cancellationStatus = f.cancel(true); - } - //in the future we can add retry, etc. - logger.error("Repair failed for range {}-{} for {} tables {} after {} retries, total assignments: {}," + - "processed assignments: {}, cancellationStatus: {}", tokenRange.left, tokenRange.right, keyspaceName, - curRepairAssignment.getTableNames(), retryCount, totalRepairAssignments, totalProcessedAssignments, cancellationStatus); - collectectedRepairStats.failedTokenRanges += ranges.size(); - } - ranges.clear(); - } - logger.info("Repair completed for {} tables {}, range {}", keyspaceName, curRepairAssignment.getTableNames(), curRepairAssignment.getTokenRange()); - } - catch (Exception e) - { - logger.error("Exception while repairing keyspace {}:", keyspaceName, e); - } + logger.info("Skipping repairs for priorityBucket={} for keyspace={} since it yielded no assignments", repairAssignments.getPriority(), repairAssignments.getKeyspaceName()); + continue; } + + logger.info("Submitting repairs for priorityBucket={} for keyspace={} with assignmentCount={}", repairAssignments.getPriority(), repairAssignments.getKeyspaceName(), repairAssignments.getRepairAssignments().size()); + repairKeyspace(repairType, primaryRangeOnly, repairAssignments.getKeyspaceName(), repairAssignments.getRepairAssignments(), collectedRepairStats); } - cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, collectectedRepairStats); + + cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, collectedRepairStats); } else { @@ -387,6 +260,127 @@ else if (retryCount < config.getRepairMaxRetries()) } } + private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List repairAssignments, CollectedRepairStats collectedRepairStats) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AutoRepairState repairState = repairStates.get(repairType); + + // evaluate over each keyspace's repair assignments. + repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); + + int totalRepairAssignments = repairAssignments.size(); + long keyspaceStartTime = timeFunc.get(); + RepairAssignment previousAssignment = null; + long tableStartTime = timeFunc.get(); + int totalProcessedAssignments = 0; + Set> ranges = new HashSet<>(); + for (RepairAssignment curRepairAssignment : repairAssignments) + { + try + { + totalProcessedAssignments++; + boolean repairOneTableAtATime = !config.getRepairByKeyspace(repairType); + if (previousAssignment != null && repairOneTableAtATime && !previousAssignment.tableNames.equals(curRepairAssignment.tableNames)) + { + // In the repair assignment, all the tables are appended sequnetially. + // Check if we have a different table, and if so, we should reset the table start time. + tableStartTime = timeFunc.get(); + } + previousAssignment = curRepairAssignment; + if (!config.isAutoRepairEnabled(repairType)) + { + logger.error("Auto-repair for type {} is disabled hence not running repair", repairType); + repairState.setRepairInProgress(false); + return; + } + if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, repairAssignments.size())) + { + collectedRepairStats.skippedTokenRanges += totalRepairAssignments - totalProcessedAssignments; + logger.info("Keyspace took too much time to repair hence skipping it {}", + keyspaceName); + break; + } + if (repairOneTableAtATime && AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) + { + collectedRepairStats.skippedTokenRanges += 1; + logger.info("Table took too much time to repair hence skipping it table name {}.{}, token range {}", + keyspaceName, curRepairAssignment.tableNames, curRepairAssignment.tokenRange); + continue; + } + + Range tokenRange = curRepairAssignment.getTokenRange(); + logger.debug("Current Token Left side {}, right side {}", + tokenRange.left.toString(), + tokenRange.right.toString()); + + ranges.add(curRepairAssignment.getTokenRange()); + if ((totalProcessedAssignments % config.getRepairThreads(repairType) == 0) || + (totalProcessedAssignments == totalRepairAssignments)) + { + int retryCount = 0; + Future f = null; + while (retryCount <= config.getRepairMaxRetries()) + { + RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, + Lists.newArrayList(curRepairAssignment.getTableNames()), + ranges, primaryRangeOnly); + repairState.resetWaitCondition(); + f = repairRunnableExecutors.get(repairType).submit(task); + try + { + repairState.waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); + } + catch (InterruptedException e) + { + logger.error("Exception in cond await:", e); + } + if (repairState.isSuccess()) + { + break; + } + else if (retryCount < config.getRepairMaxRetries()) + { + boolean cancellationStatus = f.cancel(true); + logger.warn("Repair failed for range {}-{} for {} tables {} with cancellationStatus: {} retrying after {} seconds...", + tokenRange.left, tokenRange.right, + keyspaceName, curRepairAssignment.getTableNames(), + cancellationStatus, config.getRepairRetryBackoff().toSeconds()); + sleepFunc.accept(config.getRepairRetryBackoff().toSeconds(), TimeUnit.SECONDS); + } + retryCount++; + } + //check repair status + if (repairState.isSuccess()) + { + logger.info("Repair completed for range {}-{} for {} tables {}, total assignments: {}," + + "processed assignments: {}", tokenRange.left, tokenRange.right, + keyspaceName, curRepairAssignment.getTableNames(), totalRepairAssignments, totalProcessedAssignments); + collectedRepairStats.succeededTokenRanges += ranges.size(); + } + else + { + boolean cancellationStatus = true; + if (f != null) + { + cancellationStatus = f.cancel(true); + } + //in the future we can add retry, etc. + logger.error("Repair failed for range {}-{} for {} tables {} after {} retries, total assignments: {}," + + "processed assignments: {}, cancellationStatus: {}", tokenRange.left, tokenRange.right, keyspaceName, + curRepairAssignment.getTableNames(), retryCount, totalRepairAssignments, totalProcessedAssignments, cancellationStatus); + collectedRepairStats.failedTokenRanges += ranges.size(); + } + ranges.clear(); + } + logger.info("Repair completed for {} tables {}, range {}", keyspaceName, curRepairAssignment.getTableNames(), curRepairAssignment.getTokenRange()); + } + catch (Exception e) + { + logger.error("Exception while repairing keyspace {}:", keyspaceName, e); + } + } + } + private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoRepairState repairState, AutoRepairConfig config, UUID myId) { if (repairState.getLastRepairTime() == 0) @@ -409,7 +403,7 @@ private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoR return false; } - private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairConfig config, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, CollectectedRepairStats collectectedRepairStats) + private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairConfig config, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, CollectedRepairStats collectedRepairStats) { Tables tables = keyspace.getMetadata().tables; List tablesToBeRepaired = new ArrayList<>(); @@ -425,7 +419,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon { logger.info("Repair is disabled for keyspace {} for tables: {}", keyspace.getName(), tableName); repairState.setTotalDisabledTablesRepairCount(repairState.getTotalDisabledTablesRepairCount() + 1); - collectectedRepairStats.skippedTables++; + collectedRepairStats.skippedTables++; continue; } @@ -436,7 +430,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon { logger.info("Too many SSTables for repair for table {}.{}" + "totalSSTables {}", keyspace.getName(), tableName, totalSSTables); - collectectedRepairStats.skippedTables++; + collectedRepairStats.skippedTables++; continue; } @@ -454,7 +448,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon } private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, - long startTime, CollectectedRepairStats collectectedRepairStats) throws InterruptedException + long startTime, CollectedRepairStats collectedRepairStats) throws InterruptedException { //if it was due to priority then remove it now if (turn == MY_TURN_DUE_TO_PRIORITY) @@ -463,10 +457,10 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType AutoRepairUtils.removePriorityStatus(repairType, myId); } - repairState.setFailedTokenRangesCount(collectectedRepairStats.failedTokenRanges); - repairState.setSucceededTokenRangesCount(collectectedRepairStats.succeededTokenRanges); - repairState.setSkippedTokenRangesCount(collectectedRepairStats.skippedTokenRanges); - repairState.setSkippedTablesCount(collectectedRepairStats.skippedTables); + repairState.setFailedTokenRangesCount(collectedRepairStats.failedTokenRanges); + repairState.setSucceededTokenRangesCount(collectedRepairStats.succeededTokenRanges); + repairState.setSkippedTokenRangesCount(collectedRepairStats.skippedTokenRanges); + repairState.setSkippedTablesCount(collectedRepairStats.skippedTables); repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + @@ -498,7 +492,7 @@ public AutoRepairState getRepairState(AutoRepairConfig.RepairType repairType) return repairStates.get(repairType); } - static class CollectectedRepairStats + static class CollectedRepairStats { int failedTokenRanges = 0; int succeededTokenRanges = 0; diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java index 05e6bafa3173..7bb1042b41c8 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java @@ -21,9 +21,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.LinkedHashMap; +import java.util.Iterator; import java.util.List; -import java.util.Map; +import java.util.NoSuchElementException; import org.apache.cassandra.service.AutoRepairService; @@ -36,21 +36,70 @@ public class DefaultAutoRepairTokenSplitter implements IAutoRepairTokenRangeSplitter { @Override - public Map> getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, Map> keyspacesAndTablesToRepair) + public Iterator getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans) { - Map> repairAssignmentsByKeyspace = new LinkedHashMap<>(); - for (Map.Entry> keyspace : keyspacesAndTablesToRepair.entrySet()) + return new RepairAssignmentIterator(repairType, primaryRangeOnly, repairPlans); + } + + private class RepairAssignmentIterator implements Iterator + { + + private final AutoRepairConfig.RepairType repairType; + private final boolean primaryRangeOnly; + + private final Iterator repairPlanIterator; + + private Iterator currentIterator = null; + private PrioritizedRepairPlan currentPlan = null; + + RepairAssignmentIterator(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans) + { + this.repairType = repairType; + this.primaryRangeOnly = primaryRangeOnly; + this.repairPlanIterator = repairPlans.iterator(); + } + + private synchronized Iterator currentIterator() + { + if (currentIterator == null || !currentIterator.hasNext()) + { + // Advance the repair plan iterator if the current repair plan is exhausted, but only + // if there are more repair plans. + if (repairPlanIterator.hasNext()) + { + currentPlan = repairPlanIterator.next(); + currentIterator = currentPlan.getKeyspaceRepairPlans().iterator(); + } + } + + return currentIterator; + } + + @Override + public boolean hasNext() { - repairAssignmentsByKeyspace.put(keyspace.getKey(), getRepairAssignmentsForKeyspace(repairType, primaryRangeOnly, keyspace.getKey(), keyspace.getValue())); + return currentIterator().hasNext(); } - return repairAssignmentsByKeyspace; + @Override + public KeyspaceRepairAssignments next() + { + if (!currentIterator.hasNext()) + { + throw new NoSuchElementException("No remaining repair plans"); + } + + final KeyspaceRepairPlan repairPlan = currentIterator().next(); + return getRepairAssignmentsForKeyspace(repairType, primaryRangeOnly, currentPlan.getPriority(), repairPlan); + } } - private List getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List tableNames) + private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, int priority, KeyspaceRepairPlan repairPlan) { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); List repairAssignments = new ArrayList<>(); + String keyspaceName = repairPlan.getKeyspaceName(); + List tableNames = repairPlan.getTableNames(); Collection> tokens = StorageService.instance.getPrimaryRanges(keyspaceName); if (!primaryRangeOnly) @@ -87,7 +136,7 @@ private List getRepairAssignmentsForKeyspace(AutoRepairConfig. } } } - reorderByPriority(repairAssignments, repairType); - return repairAssignments; + + return new KeyspaceRepairAssignments(priority, keyspaceName, repairAssignments); } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index d64916dcbd58..b51fb19ab460 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -17,17 +17,8 @@ */ package org.apache.cassandra.repair.autorepair; - -import java.util.Comparator; +import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.Objects; - -import com.google.common.annotations.VisibleForTesting; - -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; public interface IAutoRepairTokenRangeSplitter { @@ -37,94 +28,8 @@ public interface IAutoRepairTokenRangeSplitter * The autorepair framework will repair the list of returned subrange in a sequence. * @param repairType The type of repair being executed * @param primaryRangeOnly Whether to repair only this node's primary ranges or all of its ranges. - * @param keyspacesAndTablesToRepair A map keyed by keyspace name and valued by tables to generate assignments for. - * @return repair assignments broken up by range, keyspace and tables. - */ - Map> getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, Map> keyspacesAndTablesToRepair); - - /** - * Reorders the list of {@link RepairAssignment} objects based on their priority for a given repair type. - * The list is sorted in descending order, so higher priority assignments appear first. - * If two assignments have the same priority for the specified repair type, their original order is preserved. - * - * @param repairAssignments A list of {@link RepairAssignment} objects to be reordered. - * @param repairType The {@link AutoRepairConfig.RepairType} used to determine the priority of each assignment. - * The priority is determined using the {@link RepairAssignment#getPriority(AutoRepairConfig.RepairType)} method. + * @param repairPlans A list of ordered prioritized repair plans to generate assignments for in order. + * @return Iterator of repair assignments, with each element representing a grouping of repair assignments for a given keyspace. */ - @VisibleForTesting - default void reorderByPriority(List repairAssignments, AutoRepairConfig.RepairType repairType) - { - repairAssignments.sort(Comparator.comparingInt(a -> ((RepairAssignment) a).getPriority(repairType)).reversed()); - } - - /** - * Defines a repair assignment to be issued by the autorepair framework. - */ - class RepairAssignment - { - final Range tokenRange; - - final String keyspaceName; - - final List tableNames; - - public RepairAssignment(Range tokenRange, String keyspaceName, List tableNames) - { - this.tokenRange = tokenRange; - this.keyspaceName = keyspaceName; - this.tableNames = tableNames; - } - - public Range getTokenRange() - { - return tokenRange; - } - - public String getKeyspaceName() - { - return keyspaceName; - } - - public List getTableNames() - { - return tableNames; - } - - public int getPriority(AutoRepairConfig.RepairType type) - { - int max = 0; - for (String table : tableNames) - { - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, table); - if (cfs != null) - max = Math.max(max, cfs.metadata().params.automatedRepair.get(type).priority()); - } - return max; - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - RepairAssignment that = (RepairAssignment) o; - return Objects.equals(tokenRange, that.tokenRange) && Objects.equals(keyspaceName, that.keyspaceName) && Objects.equals(tableNames, that.tableNames); - } - - @Override - public int hashCode() - { - return Objects.hash(tokenRange, keyspaceName, tableNames); - } - - @Override - public String toString() - { - return "RepairAssignment{" + - "tokenRange=" + tokenRange + - ", keyspaceName='" + keyspaceName + '\'' + - ", tableNames=" + tableNames + - '}'; - } - } + Iterator getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairAssignments.java b/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairAssignments.java new file mode 100644 index 000000000000..3ea91e9922f9 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairAssignments.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; + +/** + * A grouping of repair assignments that were generated for a particular keyspace for a given priority. + */ +public class KeyspaceRepairAssignments +{ + private final int priority; + private final String keyspaceName; + private final List repairAssignments; + + public KeyspaceRepairAssignments(int priority, String keyspaceName, List repairAssignments) + { + this.priority = priority; + this.keyspaceName = keyspaceName; + this.repairAssignments = repairAssignments; + } + + public int getPriority() + { + return priority; + } + + public String getKeyspaceName() + { + return keyspaceName; + } + + public List getRepairAssignments() + { + return repairAssignments; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairPlan.java b/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairPlan.java new file mode 100644 index 000000000000..3c13e3d80d08 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairPlan.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; +import java.util.Objects; + +/** + * Encapsulates an intent to repair the given keyspace's tables + */ +public class KeyspaceRepairPlan +{ + private final String keyspaceName; + + private final List tableNames; + + public KeyspaceRepairPlan(String keyspaceName, List tableNames) + { + this.keyspaceName = keyspaceName; + this.tableNames = tableNames; + } + + public String getKeyspaceName() + { + return keyspaceName; + } + + public List getTableNames() + { + return tableNames; + } + + @Override + public boolean equals(Object o) + { + if (o == null || getClass() != o.getClass()) return false; + KeyspaceRepairPlan that = (KeyspaceRepairPlan) o; + return Objects.equals(keyspaceName, that.keyspaceName) && Objects.equals(tableNames, that.tableNames); + } + + @Override + public int hashCode() + { + return Objects.hash(keyspaceName, tableNames); + } + + @Override + public String toString() + { + return "KeyspaceRepairPlan{" + + "keyspaceName='" + keyspaceName + '\'' + + ", tableNames=" + tableNames + + '}'; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java new file mode 100644 index 000000000000..98cdd95421b1 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TreeSet; +import java.util.function.Consumer; + +import org.apache.cassandra.db.ColumnFamilyStore; + +/** + * Encapsulates a devised plan to repair tables, grouped by their keyspace and a given priority. This is used + * by {@link AutoRepair} to pass in an organized plan to + * {@link IAutoRepairTokenRangeSplitter#getRepairAssignments(AutoRepairConfig.RepairType, boolean, List)} which + * can iterate over this plan in order to generate {@link RepairAssignment}s. + */ +public class PrioritizedRepairPlan +{ + private final int priority; + + private final List keyspaceRepairPlans; + + public PrioritizedRepairPlan(int priority, List keyspaceRepairPlans) + { + this.priority = priority; + this.keyspaceRepairPlans = keyspaceRepairPlans; + } + + public int getPriority() + { + return priority; + } + + public List getKeyspaceRepairPlans() + { + return keyspaceRepairPlans; + } + + @Override + public boolean equals(Object o) + { + if (o == null || getClass() != o.getClass()) return false; + PrioritizedRepairPlan that = (PrioritizedRepairPlan) o; + return priority == that.priority && Objects.equals(keyspaceRepairPlans, that.keyspaceRepairPlans); + } + + @Override + public int hashCode() + { + return Objects.hash(priority, keyspaceRepairPlans); + } + + @Override + public String toString() + { + return "PrioritizedRepairPlan{" + + "priority=" + priority + + ", keyspaceRepairPlans=" + keyspaceRepairPlans + + '}'; + } + + /** + * Builds a list of {@link PrioritizedRepairPlan}s for the given keyspace and table map, ordered by priority from + * highest to lowest, where priority is derived from table schema's defined priority for the given repair type. + *

+ * If a keyspace has tables with differing priorities, those tables will be included in the PrioritizedRepairPlan + * for their given priority. + * + * @param keyspacesToTableNames A mapping keyspace to table names + * @param repairType The repair type that is being executed + * @param orderFunc A function to order keyspace and tables in the returned plan. + * @return Ordered list of plan's by table priorities. + */ + public static List build(Map> keyspacesToTableNames, AutoRepairConfig.RepairType repairType, Consumer> orderFunc) + { + // Build a map of priority -> (keyspace -> tables) + Map>> plans = new HashMap<>(); + for (Map.Entry> keyspaceToTableNames : keyspacesToTableNames.entrySet()) + { + String keyspaceName = keyspaceToTableNames.getKey(); + for (String tableName : keyspaceToTableNames.getValue()) + { + int priority = getPriority(repairType, keyspaceName, tableName); + Map> keyspacesForPriority = plans.computeIfAbsent(priority, p -> new HashMap<>()); + List tableNamesAtPriority = keyspacesForPriority.computeIfAbsent(keyspaceName, k -> new ArrayList<>()); + tableNamesAtPriority.add(tableName); + } + } + + // Extract map into a List ordered by priority from highest to lowest. + List planList = new ArrayList<>(plans.size()); + TreeSet priorities = new TreeSet<>(Comparator.reverseOrder()); + priorities.addAll(plans.keySet()); + for (int priority : priorities) + { + Map> keyspacesAndTables = plans.get(priority); + List keyspaceRepairPlans = new ArrayList<>(keyspacesAndTables.size()); + planList.add(new PrioritizedRepairPlan(priority, keyspaceRepairPlans)); + + // Order keyspace and table names based on the input function (typically, this would shuffle the keyspace + // and table names randomly). + List keyspaceNames = new ArrayList<>(keyspacesAndTables.keySet()); + orderFunc.accept(keyspaceNames); + + for(String keyspaceName : keyspaceNames) + { + List tableNames = keyspacesAndTables.get(keyspaceName); + orderFunc.accept(tableNames); + KeyspaceRepairPlan keyspaceRepairPlan = new KeyspaceRepairPlan(keyspaceName, new ArrayList<>(tableNames)); + keyspaceRepairPlans.add(keyspaceRepairPlan); + } + } + + return planList; + } + + /** + * Convenience method to build a repair plan for a single keyspace with tables. Primarily useful in testing. + * @param keyspaceName Keyspace to repair + * @param tableNames tables to repair for the given keyspace. + * @return Single repair plan. + */ + static List buildSingleKeyspacePlan(AutoRepairConfig.RepairType repairType, String keyspaceName, String ... tableNames) + { + Map> keyspaceMap = new HashMap<>(); + keyspaceMap.put(keyspaceName, Arrays.asList(tableNames)); + return build(keyspaceMap, repairType, (l) -> {}); + } + + /** + * @return The priority of the given table if defined, otherwise 0. + */ + private static int getPriority(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName) + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + return cfs != null ? cfs.metadata().params.automatedRepair.get(repairType).priority() : 0; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java new file mode 100644 index 000000000000..c06d58e71fa2 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; +import java.util.Objects; + +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; + +/** + * Defines a repair assignment to be issued by the autorepair framework. + */ +public class RepairAssignment +{ + final Range tokenRange; + + final String keyspaceName; + + final List tableNames; + + public RepairAssignment(Range tokenRange, String keyspaceName, List tableNames) + { + this.tokenRange = tokenRange; + this.keyspaceName = keyspaceName; + this.tableNames = tableNames; + } + + public Range getTokenRange() + { + return tokenRange; + } + + public String getKeyspaceName() + { + return keyspaceName; + } + + public List getTableNames() + { + return tableNames; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + RepairAssignment that = (RepairAssignment) o; + return Objects.equals(tokenRange, that.tokenRange) && Objects.equals(keyspaceName, that.keyspaceName) && Objects.equals(tableNames, that.tableNames); + } + + @Override + public int hashCode() + { + return Objects.hash(tokenRange, keyspaceName, tableNames); + } + + @Override + public String toString() + { + return "RepairAssignment{" + + "tokenRange=" + tokenRange + + ", keyspaceName='" + keyspaceName + '\'' + + ", tableNames=" + tableNames + + '}'; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 7dd2b5e1ff5f..eb70389ba147 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -23,9 +23,10 @@ import java.util.Collection; import java.util.Collections; import java.util.HashSet; -import java.util.LinkedHashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.NoSuchElementException; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -157,17 +158,68 @@ public RepairRangeSplitter(Map parameters) } @Override - public Map> getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, Map> keyspacesAndTablesToRepair) + public Iterator getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans) { - Map> repairAssignmentsByKeyspace = new LinkedHashMap<>(); - for (Map.Entry> keyspace : keyspacesAndTablesToRepair.entrySet()) + return new RepairAssignmentIterator(repairType, primaryRangeOnly, repairPlans); + } + + private class RepairAssignmentIterator implements Iterator + { + + private final AutoRepairConfig.RepairType repairType; + private final boolean primaryRangeOnly; + + private final Iterator repairPlanIterator; + + private Iterator currentIterator = null; + private PrioritizedRepairPlan currentPlan = null; + private long bytesSoFar = 0; + + RepairAssignmentIterator(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans) { - logger.debug("Calculating token range splits for repairType={} primaryRangeOnly={} keyspaceName={} tableNames={}", repairType, primaryRangeOnly, keyspace.getKey(), keyspace.getValue()); - Collection> tokenRanges = getTokenRanges(primaryRangeOnly, keyspace.getKey()); - repairAssignmentsByKeyspace.put(keyspace.getKey(), getRepairAssignmentsForKeyspace(repairType, keyspace.getKey(), keyspace.getValue(), tokenRanges)); + this.repairType = repairType; + this.primaryRangeOnly = primaryRangeOnly; + this.repairPlanIterator = repairPlans.iterator(); } - return filterAndOrderRepairAssignments(repairType, repairAssignmentsByKeyspace); + private synchronized Iterator currentIterator() + { + if (currentIterator == null || !currentIterator.hasNext()) + { + // Advance the repair plan iterator if the current repair plan is exhausted, but only + // if there are more repair plans. + if (repairPlanIterator.hasNext()) + { + currentPlan = repairPlanIterator.next(); + currentIterator = currentPlan.getKeyspaceRepairPlans().iterator(); + } + } + + return currentIterator; + } + + @Override + public boolean hasNext() + { + return currentIterator().hasNext(); + } + + @Override + public KeyspaceRepairAssignments next() + { + // Should not happen unless violating the contract of iterator of checking hasNext first. + if (!currentIterator.hasNext()) + { + throw new NoSuchElementException("No remaining repair plans"); + } + + final KeyspaceRepairPlan repairPlan = currentIterator().next(); + Collection> tokenRanges = getTokenRanges(primaryRangeOnly, repairPlan.getKeyspaceName()); + List repairAssignments = getRepairAssignmentsForKeyspace(repairType, repairPlan.getKeyspaceName(), repairPlan.getTableNames(), tokenRanges); + FilteredRepairAssignments filteredRepairAssignments = filterRepairAssignments(repairType, currentPlan.getPriority(), repairPlan.getKeyspaceName(), repairAssignments, bytesSoFar); + bytesSoFar = filteredRepairAssignments.newBytesSoFar; + return new KeyspaceRepairAssignments(currentPlan.getPriority(), repairPlan.getKeyspaceName(), filteredRepairAssignments.repairAssignments); + } } @VisibleForTesting @@ -239,63 +291,49 @@ else if (tableAssignments.size() == 1 && } /** - * Given a repair type and map of sized-based repair assignments by keyspace order them by priority of the - * assignments' underlying tables and confine them by maxBytesPerSchedule. + * Given a repair type and map of sized-based repair assignments, confine them by maxBytesPerSchedule. * @param repairType used to determine underyling table priorities - * @param repairAssignmentsByKeyspace the assignments to filter. - * @return A list of repair assignments ordered by priority and confined by maxBytesPerSchedule. + * @param repairAssignments the assignments to filter. + * @param bytesSoFar repair assignment bytes accumulated so far. + * @return A list of repair assignments confined by maxBytesPerSchedule. */ @VisibleForTesting - Map> filterAndOrderRepairAssignments(AutoRepairConfig.RepairType repairType, Map> repairAssignmentsByKeyspace) + FilteredRepairAssignments filterRepairAssignments(AutoRepairConfig.RepairType repairType, int priority, String keyspaceName, List repairAssignments, long bytesSoFar) { // Confine repair assignments by maxBytesPerSchedule. - long bytesSoFar = 0L; + long bytesSoFarThisIteration = 0L; long bytesNotRepaired = 0L; int assignmentsNotRepaired = 0; int assignmentsToRepair = 0; int totalAssignments = 0; - Map> filteredRepairAssignmentsByKeyspace = new LinkedHashMap<>(); - for (Map.Entry> keyspaceAssignments : repairAssignmentsByKeyspace.entrySet()) + List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); + for (SizedRepairAssignment repairAssignment : repairAssignments) { - String keyspace = keyspaceAssignments.getKey(); - List repairAssignments = keyspaceAssignments.getValue(); - - // Reorder the repair assignments - reorderByPriority(repairAssignments, repairType); - - List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); - for (SizedRepairAssignment repairAssignment : repairAssignments) + totalAssignments++; + // skip any repair assignments that would accumulate us past the maxBytesPerSchedule + if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule) { - totalAssignments++; - // skip any repair assignments that would accumulate us past the maxBytesPerSchedule - if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule) - { - // log that repair assignment was skipped. - bytesNotRepaired += repairAssignment.getEstimatedBytes(); - assignmentsNotRepaired++; - logger.warn("Skipping {} because it would increase total repair bytes to {}", - repairAssignment, - getBytesOfMaxBytesPerSchedule(bytesSoFar + repairAssignment.getEstimatedBytes())); - } - else - { - bytesSoFar += repairAssignment.getEstimatedBytes(); - assignmentsToRepair++; - logger.info("Adding {}, increasing repair bytes to {}", - repairAssignment, - getBytesOfMaxBytesPerSchedule(bytesSoFar)); - assignmentsToReturn.add(repairAssignment); - } + // log that repair assignment was skipped. + bytesNotRepaired += repairAssignment.getEstimatedBytes(); + assignmentsNotRepaired++; + logger.warn("Skipping {} because it would increase total repair bytes to {}", + repairAssignment, + getBytesOfMaxBytesPerSchedule(bytesSoFar + repairAssignment.getEstimatedBytes())); } - - if (!assignmentsToReturn.isEmpty()) + else { - filteredRepairAssignmentsByKeyspace.put(keyspace, assignmentsToReturn); + bytesSoFar += repairAssignment.getEstimatedBytes(); + bytesSoFarThisIteration += repairAssignment.getEstimatedBytes(); + assignmentsToRepair++; + logger.info("Adding {}, increasing repair bytes to {}", + repairAssignment, + getBytesOfMaxBytesPerSchedule(bytesSoFar)); + assignmentsToReturn.add(repairAssignment); } } - String message = "Returning {} assignment(s), totaling {}"; + String message = "Returning {} assignment(s) for priorityBucket {} and keyspace {}, totaling {} ({} overall)"; if (assignmentsNotRepaired != 0) { message += ". Skipping {} of {} assignment(s), totaling {}"; @@ -304,23 +342,41 @@ Map> filterAndOrderRepairAssignments(AutoRepairCo message += ". The entire primary range will not be repaired this schedule. " + "Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure " + "all ranges do get repaired within gc_grace_seconds"; - logger.warn(message, assignmentsToRepair, getBytesOfMaxBytesPerSchedule(bytesSoFar), + logger.warn(message, assignmentsToRepair, priority, keyspaceName, + FileUtils.stringifyFileSize(bytesSoFarThisIteration), + getBytesOfMaxBytesPerSchedule(bytesSoFar), assignmentsNotRepaired, totalAssignments, FileUtils.stringifyFileSize(bytesNotRepaired)); } else { - logger.info(message, assignmentsToRepair, getBytesOfMaxBytesPerSchedule(bytesSoFar), + logger.info(message, assignmentsToRepair, priority, keyspaceName, + FileUtils.stringifyFileSize(bytesSoFarThisIteration), + getBytesOfMaxBytesPerSchedule(bytesSoFar), assignmentsNotRepaired, totalAssignments, FileUtils.stringifyFileSize(bytesNotRepaired)); } } else { - logger.info(message, assignmentsToRepair, getBytesOfMaxBytesPerSchedule(bytesSoFar)); + logger.info(message, assignmentsToRepair, priority, keyspaceName, + FileUtils.stringifyFileSize(bytesSoFarThisIteration), + getBytesOfMaxBytesPerSchedule(bytesSoFar)); } - return filteredRepairAssignmentsByKeyspace; + return new FilteredRepairAssignments(assignmentsToReturn, bytesSoFar); + } + + private static class FilteredRepairAssignments + { + private final List repairAssignments; + private final long newBytesSoFar; + + private FilteredRepairAssignments(List repairAssignments, long newBytesSoFar) + { + this.repairAssignments = repairAssignments; + this.newBytesSoFar = newBytesSoFar; + } } private String getBytesOfMaxBytesPerSchedule(long bytes) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java index 8cee64f5a238..086d8c114897 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java @@ -24,8 +24,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.LinkedHashMap; -import java.util.Map; +import java.util.Iterator; import org.junit.BeforeClass; import org.junit.Test; @@ -39,7 +38,6 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.sai.disk.format.Version; -import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; @@ -47,7 +45,9 @@ import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) public class AutoRepairDefaultTokenSplitterParameterizedTest @@ -111,15 +111,17 @@ public void testTokenRangesSplitByTable() AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairSubRangeNum(repairType, numberOfSplits); - Map> keyspaceToTables = new LinkedHashMap<>(); - keyspaceToTables.put(KEYSPACE, tables); - Map> assignmentsByKeyspace = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, keyspaceToTables); - // should be 1 entry for the keyspace. - assertEquals(1, assignmentsByKeyspace.size()); - List assignments = assignmentsByKeyspace.get(KEYSPACE); - assertNotNull(assignments); + List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); + + Iterator keyspaceAssignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, plan); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + List assignments = keyspace.getRepairAssignments(); assertEquals(totalTokenRanges*numberOfSplits*tables.size(), assignments.size()); assertEquals(expectedToken.size(), assignments.size()); @@ -133,6 +135,7 @@ public void testTokenRangesSplitByTable() } expectedTableIndex = -1; + // should be a set of ranges for each table. for (int i = 0; i> keyspaceToTables = new LinkedHashMap<>(); - keyspaceToTables.put(KEYSPACE, tables); - Map> assignmentsByKeyspace = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, keyspaceToTables); - // should be 1 entry for the keyspace. - assertEquals(1, assignmentsByKeyspace.size()); - List assignments = assignmentsByKeyspace.get(KEYSPACE); + List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); + + Iterator keyspaceAssignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, plan); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); assertNotNull(assignments); assertEquals(totalTokenRanges*numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); - for (int i = 0; i < totalTokenRanges * numberOfSplits; i++) + // should only be one set of ranges for the entire keyspace. + for (int i = 0; i> expectedToken = new ArrayList<>(tokens); - Map> keyspaceToTables = new LinkedHashMap<>(); - keyspaceToTables.put(KEYSPACE, Collections.singletonList(TABLE)); - Map> assignmentsByKeyspace = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, keyspaceToTables); + List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE); - // should be 1 entry for the keyspace. - assertEquals(1, assignmentsByKeyspace.size()); - List assignments = assignmentsByKeyspace.get(KEYSPACE); + Iterator keyspaceAssignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, plan); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); assertNotNull(assignments); // should be 1 entry for the table which covers the full range. diff --git a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java new file mode 100644 index 000000000000..507d26437e36 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; + +import org.junit.Test; + +import org.apache.cassandra.cql3.CQLTester; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class PrioritizedRepairPlanTest extends CQLTester +{ + + @Test + public void testBuildWithDifferentPriorities() + { + // Test reordering assignments with different priorities + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1, table2, table3); + assertEquals(3, prioritizedRepairPlans.size()); + + // Verify the order is by descending priority and matches the expected tables + assertEquals(3, prioritizedRepairPlans.get(0).getPriority()); + assertEquals(table2, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + + assertEquals(2, prioritizedRepairPlans.get(1).getPriority()); + assertEquals(table1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + + assertEquals(1, prioritizedRepairPlans.get(2).getPriority()); + assertEquals(table3, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + } + + @Test + public void testBuildWithSamePriority() + { + // Test reordering assignments with the same priority + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + + // Expect only 1 plan since all tables share the same priority + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1, table2, table3); + assertEquals(1, prioritizedRepairPlans.size()); + + // Verify all tables present in the plan + assertEquals(1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().size()); + KeyspaceRepairPlan keyspaceRepairPlan = prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0); + + List tableNames = keyspaceRepairPlan.getTableNames(); + assertEquals(3, tableNames.size()); + assertEquals(table1, tableNames.get(0)); + assertEquals(table2, tableNames.get(1)); + assertEquals(table3, tableNames.get(2)); + } + + @Test + public void testBuildWithMixedPriorities() + { + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table4 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + + // Expect only 3 plans + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1, table2, table3, table4); + assertEquals(3, prioritizedRepairPlans.size()); + + // Verify the order is by descending priority and matches the expected tables + assertEquals(3, prioritizedRepairPlans.get(0).getPriority()); + assertEquals(table2, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + + assertEquals(2, prioritizedRepairPlans.get(1).getPriority()); + assertEquals(table1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + assertEquals(table3, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getTableNames().get(1)); + + assertEquals(1, prioritizedRepairPlans.get(2).getPriority()); + assertEquals(table4, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + } + + @Test + public void testBuildWithEmptyTableList() + { + // Test with an empty table list (should remain empty) + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE); + assertTrue(prioritizedRepairPlans.isEmpty()); + } + + @Test + public void testBuildWithOneTable() + { + // Test with a single element (should remain unchanged) + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '5'}"); + + // Expect only 1 plans + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1); + assertEquals(1, prioritizedRepairPlans.size()); + + // Verify the order is by descending priority and matches the expected tables + assertEquals(5, prioritizedRepairPlans.get(0).getPriority()); + assertEquals(table1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + } + +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index 606737817ac4..b41da38ed641 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -38,7 +38,6 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter.RepairAssignment; import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SizedRepairAssignment; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.concurrent.Refs; @@ -64,7 +63,8 @@ public static void setUpClass() } @Before - public void setUp() { + public void setUp() + { repairRangeSplitter = new RepairRangeSplitter(Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); } @@ -100,103 +100,16 @@ public void testSizePartitionCountSplit() throws Throwable } @Test - public void testReorderByPriorityWithDifferentPriorities() { - String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); - String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); - - // Test reordering assignments with different priorities - RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table1)); - RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table2)); - RepairAssignment assignment3 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table3)); - - // Assume these priorities based on the repair type - List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3)); - - repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); - - // Verify the order is by descending priority - assertEquals(assignment2, assignments.get(0)); - assertEquals(assignment1, assignments.get(1)); - assertEquals(assignment3, assignments.get(2)); - } - - @Test - public void testReorderByPriorityWithSamePriority() { - String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - - // Test reordering assignments with the same priority - RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table1)); - RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table2)); - RepairAssignment assignment3 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table3)); - - List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3)); - - repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); - - // Verify the original order is preserved as all priorities are the same - assertEquals(assignment1, assignments.get(0)); - assertEquals(assignment2, assignments.get(1)); - assertEquals(assignment3, assignments.get(2)); - } - - @Test - public void testReorderByPriorityWithMixedPriorities() { - String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); - String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table4 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); - - // Test reordering assignments with mixed priorities - RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table1)); - RepairAssignment assignment2 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table2)); - RepairAssignment assignment3 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table3)); - RepairAssignment assignment4 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table4)); - - List assignments = new ArrayList<>(Arrays.asList(assignment1, assignment2, assignment3, assignment4)); - - repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); - - // Verify the order: highest priority first, then preserved order for same priority - assertEquals(assignment2, assignments.get(0)); // Priority 3 - assertEquals(assignment1, assignments.get(1)); // Priority 2 - assertEquals(assignment3, assignments.get(2)); // Priority 2 - assertEquals(assignment4, assignments.get(3)); // Priority 1 - } - - @Test - public void testReorderByPriorityWithEmptyList() { - // Test with an empty list (should remain empty) - List assignments = new ArrayList<>(); - repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); - assertTrue(assignments.isEmpty()); - } - - @Test - public void testReorderByPriorityWithOneElement() { - // Test with a single element (should remain unchanged) - String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '5'}"); - - RepairAssignment assignment1 = new RepairAssignment(FULL_RANGE, KEYSPACE, Collections.singletonList(table1)); - - List assignments = new ArrayList<>(Collections.singletonList(assignment1)); - - repairRangeSplitter.reorderByPriority(assignments, AutoRepairConfig.RepairType.FULL); - - assertEquals(assignment1, assignments.get(0)); // Single element should remain in place - } - - @Test - public void testGetRepairAssignmentsForTable_NoSSTables() { + public void testGetRepairAssignmentsForTable_NoSSTables() + { Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); assertEquals(0, assignments.size()); } @Test - public void testGetRepairAssignmentsForTable_Single() throws Throwable { + public void testGetRepairAssignmentsForTable_Single() throws Throwable + { Collection> ranges = Collections.singleton(new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken())); insertAndFlushSingleTable(tableName); List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); @@ -204,7 +117,8 @@ public void testGetRepairAssignmentsForTable_Single() throws Throwable { } @Test - public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable { + public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable + { repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "2")); Collection> ranges = Collections.singleton(FULL_RANGE); @@ -218,7 +132,8 @@ public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable { } @Test - public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable { + public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable + { repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "2")); Collection> ranges = Collections.singleton(FULL_RANGE); @@ -231,7 +146,8 @@ public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable { } @Test - public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable { + public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable + { repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "1")); Collection> ranges = Collections.singleton(FULL_RANGE); @@ -242,7 +158,8 @@ public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable { } @Test - public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { + public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable + { repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "100")); Collection> ranges = Collections.singleton(FULL_RANGE); @@ -253,14 +170,16 @@ public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { } @Test(expected = IllegalStateException.class) - public void testMergeEmptyAssignments() { + public void testMergeEmptyAssignments() + { // Test when the list of assignments is empty List emptyAssignments = Collections.emptyList(); RepairRangeSplitter.merge(emptyAssignments); } @Test - public void testMergeSingleAssignment() { + public void testMergeSingleAssignment() + { // Test when there is only one assignment in the list String keyspaceName = "testKeyspace"; List tableNames = Arrays.asList("table1", "table2"); @@ -276,7 +195,8 @@ public void testMergeSingleAssignment() { } @Test - public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() { + public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() + { // Test merging multiple assignments with the same token range and keyspace String keyspaceName = "testKeyspace"; List tableNames1 = Arrays.asList("table1", "table2"); @@ -294,7 +214,8 @@ public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() { } @Test(expected = IllegalStateException.class) - public void testMergeDifferentTokenRange() { + public void testMergeDifferentTokenRange() + { // Test merging assignments with different token ranges Iterator> range = AutoRepairUtils.split(FULL_RANGE, 2).iterator(); // Split the full range into two ranges ie (0-100, 100-200 Range tokenRange1 = range.next(); @@ -312,7 +233,8 @@ public void testMergeDifferentTokenRange() { } @Test(expected = IllegalStateException.class) - public void testMergeDifferentKeyspaceName() { + public void testMergeDifferentKeyspaceName() + { // Test merging assignments with different keyspace names List tableNames = Arrays.asList("table1", "table2"); @@ -324,7 +246,8 @@ public void testMergeDifferentKeyspaceName() { } @Test - public void testMergeWithDuplicateTables() { + public void testMergeWithDuplicateTables() + { // Test merging assignments with duplicate table names String keyspaceName = "testKeyspace"; List tableNames1 = Arrays.asList("table1", "table2"); @@ -341,12 +264,14 @@ public void testMergeWithDuplicateTables() { } - private void insertAndFlushSingleTable(String tableName) throws Throwable { + private void insertAndFlushSingleTable(String tableName) throws Throwable + { execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); flush(); } - private List createAndInsertTables(int count) throws Throwable { + private List createAndInsertTables(int count) throws Throwable + { List tableNames = new ArrayList<>(); for (int i = 0; i < count; i++) { String tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); @@ -356,11 +281,14 @@ private List createAndInsertTables(int count) throws Throwable { return tableNames; } - private void insertAndFlushTable(String tableName) throws Throwable { + private void insertAndFlushTable(String tableName) throws Throwable + { insertAndFlushTable(tableName, 1); ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(KEYSPACE, tableName); } - private void insertAndFlushTable(String tableName, int... vals) throws Throwable { + + private void insertAndFlushTable(String tableName, int... vals) throws Throwable + { for (int i : vals) { executeFormattedQuery("INSERT INTO " + KEYSPACE + '.' + tableName + " (k, v) values (?, ?)", i, i); From 0e1b290d4e8bf8353e545ad916c61ee79b2ca1ed Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 1 Dec 2024 22:42:10 -0600 Subject: [PATCH 088/257] Add more priority and range splitter tests --- .../repair/autorepair/AutoRepair.java | 9 +-- .../autorepair/PrioritizedRepairPlan.java | 2 +- .../autorepair/RepairRangeSplitter.java | 7 +- .../AutoRepairParameterizedTest.java | 27 +++---- .../autorepair/PrioritizedRepairPlanTest.java | 54 +++++++++++-- .../autorepair/RepairRangeSplitterTest.java | 75 +++++++++++++++---- 6 files changed, 129 insertions(+), 45 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 1a6ad2c7db27..6dcbf4e78696 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -84,7 +84,9 @@ public class AutoRepair protected final Map repairStates; @VisibleForTesting - protected static Consumer> shuffleFunc = java.util.Collections::shuffle; + // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair + // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. + protected static Consumer> shuffleFunc = java.util.Collections::shuffle; @VisibleForTesting protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; @@ -210,10 +212,6 @@ public void repair(AutoRepairConfig.RepairType repairType) List keyspaces = new ArrayList<>(); Keyspace.all().forEach(keyspaces::add); - // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair - // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. - shuffleFunc.accept(keyspaces); - // Filter out keyspaces and tables to repair and group into a map by keyspace. Map> keyspacesAndTablesToRepair = new LinkedHashMap<>(); for (Keyspace keyspace : keyspaces) @@ -223,7 +221,6 @@ public void repair(AutoRepairConfig.RepairType repairType) continue; } List tablesToBeRepairedList = retrieveTablesToBeRepaired(keyspace, config, repairType, repairState, collectedRepairStats); - shuffleFunc.accept(tablesToBeRepairedList); keyspacesAndTablesToRepair.put(keyspace.getName(), tablesToBeRepairedList); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java index 98cdd95421b1..6b10e5fe6b3a 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java +++ b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java @@ -93,7 +93,7 @@ public String toString() * @param orderFunc A function to order keyspace and tables in the returned plan. * @return Ordered list of plan's by table priorities. */ - public static List build(Map> keyspacesToTableNames, AutoRepairConfig.RepairType repairType, Consumer> orderFunc) + public static List build(Map> keyspacesToTableNames, AutoRepairConfig.RepairType repairType, Consumer> orderFunc) { // Build a map of priority -> (keyspace -> tables) Map>> plans = new HashMap<>(); diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index eb70389ba147..895cde20542d 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -367,10 +367,11 @@ FilteredRepairAssignments filterRepairAssignments(AutoRepairConfig.RepairType re return new FilteredRepairAssignments(assignmentsToReturn, bytesSoFar); } - private static class FilteredRepairAssignments + @VisibleForTesting + static class FilteredRepairAssignments { - private final List repairAssignments; - private final long newBytesSoFar; + final List repairAssignments; + final long newBytesSoFar; private FilteredRepairAssignments(List repairAssignments, long newBytesSoFar) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 5980ba482abf..a1f7a310d85d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -71,6 +71,7 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -592,28 +593,28 @@ public void testRepairShufflesKeyspacesAndTables() { AtomicInteger shuffleKeyspacesCall = new AtomicInteger(); AtomicInteger shuffleTablesCall = new AtomicInteger(); - AutoRepair.shuffleFunc = (List list) -> { - if (!list.isEmpty()) + AtomicInteger keyspaceCount = new AtomicInteger(); + AutoRepair.shuffleFunc = (List list) -> { + // check whether was invoked for keyspaces or tables + if (list.contains(KEYSPACE)) { - assertTrue(list.get(0) instanceof Keyspace || list.get(0) instanceof String); - if (list.get(0) instanceof Keyspace) - { - shuffleKeyspacesCall.getAndIncrement(); - assertFalse(list.isEmpty()); - } - else if (list.get(0) instanceof String) - { - shuffleTablesCall.getAndIncrement(); - } + shuffleKeyspacesCall.getAndIncrement(); + keyspaceCount.set(list.size()); } + else + // presume list not containing a keyspace is for tables. + shuffleTablesCall.getAndIncrement(); }; AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repair(repairType); + // Expect a single invocation for keyspaces assertEquals(1, shuffleKeyspacesCall.get()); - assertEquals(5, shuffleTablesCall.get()); + // Expect an invocation for tables for each keyspace + assertNotEquals(0, keyspaceCount.get()); + assertEquals(keyspaceCount.get(), shuffleTablesCall.get()); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java index 507d26437e36..011df9d1592e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java @@ -18,8 +18,11 @@ package org.apache.cassandra.repair.autorepair; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import com.google.common.collect.Lists; import org.junit.Test; import org.apache.cassandra.cql3.CQLTester; @@ -78,25 +81,60 @@ public void testBuildWithSamePriority() @Test public void testBuildWithMixedPriorities() { - String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); - String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table4 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); - - // Expect only 3 plans - List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1, table2, table3, table4); - assertEquals(3, prioritizedRepairPlans.size()); + String ks1 = createKeyspace("CREATE KEYSPACE %s WITH replication={ 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"); + String table1 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table2 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); + String table3 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table4 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + // No priority table should be bucketed at priority 0 + String table5 = createTable(ks1,"CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + + // Create a new keyspace to ensure its tables get grouped with appropriate priority bucket + String ks2 = createKeyspace("CREATE KEYSPACE %s WITH replication={ 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"); + String table6 = createTable(ks2,"CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + String table7 = createTable(ks2,"CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + + Map> keyspaceToTableMap = new HashMap<>(); + keyspaceToTableMap.put(ks1, Lists.newArrayList(table1, table2, table3, table4, table5)); + keyspaceToTableMap.put(ks2, Lists.newArrayList(table6, table7)); + + // Expect 4 plans + List prioritizedRepairPlans = PrioritizedRepairPlan.build(keyspaceToTableMap, AutoRepairConfig.RepairType.FULL, java.util.Collections::sort); + assertEquals(4, prioritizedRepairPlans.size()); // Verify the order is by descending priority and matches the expected tables assertEquals(3, prioritizedRepairPlans.get(0).getPriority()); + assertEquals(1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().size()); + assertEquals(ks1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getKeyspaceName()); assertEquals(table2, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); assertEquals(2, prioritizedRepairPlans.get(1).getPriority()); + assertEquals(1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().size()); + + assertEquals(ks1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getKeyspaceName()); assertEquals(table1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); assertEquals(table3, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getTableNames().get(1)); assertEquals(1, prioritizedRepairPlans.get(2).getPriority()); + // 2 keyspaces should be present at priority 1 + assertEquals(2, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().size()); + // ks1.table4 expected in first plan + assertEquals(ks1, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(0).getKeyspaceName()); assertEquals(table4, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + // ks2.table7 expected in second plan + assertEquals(ks2, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(1).getKeyspaceName()); + assertEquals(table7, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(1).getTableNames().get(0)); + + // Tables without priority should get bucketed at priority 0 + assertEquals(0, prioritizedRepairPlans.get(3).getPriority()); + // 2 keyspaces expected + assertEquals(2, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().size()); + // ks1.table5 expected in first plan + assertEquals(ks1, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().get(0).getKeyspaceName()); + assertEquals(table5, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + // ks2.table6 expected in second plan + assertEquals(ks2, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().get(1).getKeyspaceName()); + assertEquals(table6, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().get(1).getTableNames().get(0)); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index b41da38ed641..9baaecca545e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -22,15 +22,18 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.config.DataStorageSpec.LongMebibytesBound; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.ColumnFamilyStore; @@ -38,10 +41,15 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.FilteredRepairAssignments; +import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SizeEstimate; import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SizedRepairAssignment; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.concurrent.Refs; +import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.MAX_BYTES_PER_SCHEDULE; +import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SUBRANGE_SIZE; import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.TABLE_BATCH_LIMIT; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -57,8 +65,7 @@ public static void setUpClass() { CQLTester.setUpClass(); AutoRepairService.setup(); - // TODO: For now, always repair by keyspace to exercise priority tests - AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(AutoRepairConfig.RepairType.FULL, true); + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); } @@ -73,9 +80,9 @@ public void setUp() public void testSizePartitionCount() throws Throwable { insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - Refs sstables = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, FULL_RANGE); + Refs sstables = RepairRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE); assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); - RepairRangeSplitter.SizeEstimate sizes = RepairRangeSplitter.getSizesForRangeOfSSTables(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); + SizeEstimate sizes = RepairRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); assertEquals(10, sizes.partitions); } @@ -91,10 +98,10 @@ public void testSizePartitionCountSplit() throws Throwable Range tokenRange2 = range.next(); Assert.assertFalse(range.hasNext()); - Refs sstables1 = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange1); - Refs sstables2 = RepairRangeSplitter.getSSTableReaderRefs(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange2); - RepairRangeSplitter.SizeEstimate sizes1 = RepairRangeSplitter.getSizesForRangeOfSSTables(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); - RepairRangeSplitter.SizeEstimate sizes2 = RepairRangeSplitter.getSizesForRangeOfSSTables(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); + Refs sstables1 = RepairRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); + Refs sstables2 = RepairRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2); + SizeEstimate sizes1 = RepairRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); + SizeEstimate sizes2 = RepairRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); // +-5% because HLL merge and the applying of range size approx ratio causes estimation errors assertTrue(Math.abs(10000 - (sizes1.partitions + sizes2.partitions)) <= 60); } @@ -103,7 +110,7 @@ public void testSizePartitionCountSplit() throws Throwable public void testGetRepairAssignmentsForTable_NoSSTables() { Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); assertEquals(0, assignments.size()); } @@ -112,7 +119,7 @@ public void testGetRepairAssignmentsForTable_Single() throws Throwable { Collection> ranges = Collections.singleton(new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken())); insertAndFlushSingleTable(tableName); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(AutoRepairConfig.RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); assertEquals(1, assignments.size()); } @@ -123,7 +130,7 @@ public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); // We expect two assignments, one with table1 and table2 batched, and one with table3 assertEquals(2, assignments.size()); @@ -138,7 +145,7 @@ public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(2); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); // We expect one assignment, with two tables batched assertEquals(1, assignments.size()); @@ -152,7 +159,7 @@ public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); assertEquals(3, assignments.size()); } @@ -164,7 +171,7 @@ public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(5); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); assertEquals(1, assignments.size()); } @@ -263,6 +270,46 @@ public void testMergeWithDuplicateTables() assertEquals(new HashSet<>(Arrays.asList("table1", "table2", "table3")), new HashSet<>(result.getTableNames())); } + @Test + public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxBytesPerSchedule() + { + // Ensures that getRepairAssignments splits by SUBRANGE_SIZE and filterRepairAssignments limits by MAX_BYTES_PER_SCHEDULE. + Map parameters = new HashMap<>(); + parameters.put(SUBRANGE_SIZE, "50GiB"); + parameters.put(MAX_BYTES_PER_SCHEDULE, "100GiB"); + repairRangeSplitter = new RepairRangeSplitter(parameters); + + // Given a size estimate of 1024GiB, we should expect 21 splits (50GiB*21 = 1050GiB < 1024GiB) + SizeEstimate sizeEstimate = sizeEstimateByBytes(RepairType.INCREMENTAL, new LongMebibytesBound("1024GiB")); + + List assignments = repairRangeSplitter.getRepairAssignments(Collections.singletonList(sizeEstimate)); + + // Should be 21 assignments, each being ~48.76 GiB + assertEquals(21, assignments.size()); + long expectedBytes = 52357696560L; + for (int i = 0; i < assignments.size(); i++) + { + SizedRepairAssignment assignment = assignments.get(i); + assertEquals("Did not get expected value for assignment " + i, 52357696560L, assignment.getEstimatedBytes()); + } + + // When filtering we should only get 2 assignments back (48.76 * 2 < 100GiB) + FilteredRepairAssignments filteredRepairAssignments = repairRangeSplitter.filterRepairAssignments(RepairType.INCREMENTAL, 0, KEYSPACE, assignments, 0); + List finalRepairAssignments = filteredRepairAssignments.repairAssignments; + assertEquals(2, finalRepairAssignments.size()); + assertEquals(expectedBytes*2, filteredRepairAssignments.newBytesSoFar); + } + + private SizeEstimate sizeEstimateByBytes(RepairType repairType, LongMebibytesBound totalSize) + { + return sizeEstimateByBytes(repairType, totalSize, totalSize); + } + + private SizeEstimate sizeEstimateByBytes(RepairType repairType, LongMebibytesBound sizeInRange, LongMebibytesBound totalSize) + { + return new SizeEstimate(repairType, KEYSPACE, "table1", FULL_RANGE, 1, sizeInRange.toBytes(), totalSize.toBytes()); + } + private void insertAndFlushSingleTable(String tableName) throws Throwable { From 671a653a1aa2e98bf83097de289a438067964ee3 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 10 Dec 2024 11:45:02 -0800 Subject: [PATCH 089/257] Refactor a missed change during the rebase --- .../repair/autorepair/AutoRepairParameterizedTest.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index a1f7a310d85d..c07c7a5ae137 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -578,13 +578,9 @@ public void testDefaultAutomatedRepair() for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { Assert.assertTrue(String.format("expected repair type %s to be enabled on table %s", repairType, cfm.name), - cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).repairEnabled()); - Assert.assertTrue(String.format("expected repair type %s to be enabled on table %s", repairType, cfm.name), - cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).repairEnabled()); - Assert.assertFalse(String.format("expected repair type %s to be disabled on table %s", repairType, cfmDisabledAutoRepair.name), - cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).repairEnabled()); + cfm.params.automatedRepair.get(repairType).repairEnabled()); Assert.assertFalse(String.format("expected repair type %s to be disabled on table %s", repairType, cfmDisabledAutoRepair.name), - cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).repairEnabled()); + cfmDisabledAutoRepair.params.automatedRepair.get(repairType).repairEnabled()); } } From 32c701a182626fc1e5295ef2aa283c785ed37de5 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Fri, 6 Dec 2024 20:38:23 -0600 Subject: [PATCH 090/257] Set thread name based on RepairType.getConfigName() Changing to upper-case enums caused the executor name to be: AutoRepair-Repair-FULL instead of AutoRepair-Repair-full --- .../org/apache/cassandra/repair/autorepair/AutoRepair.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 6dcbf4e78696..bb64e8f48463 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -104,8 +104,8 @@ protected AutoRepair() repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { - repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType, Thread.NORM_PRIORITY)); - repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType, Thread.NORM_PRIORITY)); + repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); + repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); tokenRangeSplitters.put(repairType, FBUtilities.newAutoRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType))); } From 589c0213bbce8b58ca3c5b3d598eeb5bacd13599 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 8 Dec 2024 14:01:56 -0800 Subject: [PATCH 091/257] Flatten the auto_repair table property --- pylib/cqlshlib/test/test_cqlsh_output.py | 3 +- .../statements/schema/TableAttributes.java | 10 +- .../repair/autorepair/AutoRepair.java | 4 +- .../autorepair/PrioritizedRepairPlan.java | 2 +- .../cassandra/schema/AutoRepairParams.java | 104 +++++++++++------- .../cassandra/schema/SchemaKeyspace.java | 17 +-- .../apache/cassandra/schema/TableParams.java | 61 ++-------- .../statements/DescribeStatementTest.java | 8 +- .../cassandra/db/SchemaCQLHelperTest.java | 4 +- .../AutoRepairParameterizedTest.java | 18 ++- .../autorepair/PrioritizedRepairPlanTest.java | 24 ++-- 11 files changed, 112 insertions(+), 143 deletions(-) diff --git a/pylib/cqlshlib/test/test_cqlsh_output.py b/pylib/cqlshlib/test/test_cqlsh_output.py index a2524d032cb6..ec400cf7d735 100644 --- a/pylib/cqlshlib/test/test_cqlsh_output.py +++ b/pylib/cqlshlib/test/test_cqlsh_output.py @@ -702,8 +702,7 @@ def test_describe_columnfamily_output(self): AND transactional_mode = 'off' AND transactional_migration_from = 'none' AND speculative_retry = '99p' - AND repair_full = {'enabled': 'true'} - AND repair_incremental = {'enabled': 'true'};""" % quote_name(get_keyspace())) + AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};""" % quote_name(get_keyspace())) with cqlsh_testrun(tty=True, env=self.default_env) as c: for cmdword in ('describe table', 'desc columnfamily'): diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index e9b495d7d9fd..5d5f1ba903a9 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -198,14 +198,8 @@ private TableParams build(TableParams.Builder builder) if (hasOption(Option.TRANSACTIONAL_MIGRATION_FROM)) builder.transactionalMigrationFrom(TransactionalMigrationFromMode.fromString(getString(Option.TRANSACTIONAL_MIGRATION_FROM))); - if (hasOption(Option.REPAIR_FULL)) - builder.automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.FULL, getMap(Option.REPAIR_FULL))); - - if (hasOption(Option.REPAIR_INCREMENTAL)) - builder.automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.INCREMENTAL, getMap(Option.REPAIR_INCREMENTAL))); - - if (hasOption(Option.REPAIR_PREVIEW_REPAIRED)) - builder.automatedRepairPreviewRepaired(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, getMap(Option.REPAIR_PREVIEW_REPAIRED))); + if (hasOption(Option.AUTO_REPAIR)) + builder.automatedRepair(AutoRepairParams.fromMap(getMap(Option.AUTO_REPAIR))); return builder.build(); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index bb64e8f48463..eafd40eaddf7 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -155,7 +155,7 @@ public void repairAsync(AutoRepairConfig.RepairType repairType) private int getPriority(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName) { ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); - return cfs != null ? cfs.metadata().params.automatedRepair.get(repairType).priority() : 0; + return cfs != null ? cfs.metadata().params.autoRepair.priority() : 0; } // repair runs a repair session of the given type synchronously. @@ -412,7 +412,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon String tableName = tableMetadata.name; ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); - if (!columnFamilyStore.metadata().params.automatedRepair.get(repairType).repairEnabled()) + if (!columnFamilyStore.metadata().params.autoRepair.repairEnabled(repairType)) { logger.info("Repair is disabled for keyspace {} for tables: {}", keyspace.getName(), tableName); repairState.setTotalDisabledTablesRepairCount(repairState.getTotalDisabledTablesRepairCount() + 1); diff --git a/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java index 6b10e5fe6b3a..857fbe19b406 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java +++ b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java @@ -155,6 +155,6 @@ static List buildSingleKeyspacePlan(AutoRepairConfig.Repa private static int getPriority(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName) { ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); - return cfs != null ? cfs.metadata().params.automatedRepair.get(repairType).priority() : 0; + return cfs != null ? cfs.metadata().params.autoRepair.priority() : 0; } } diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index feadbe4e227e..2b289a8c3b44 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -18,10 +18,9 @@ package org.apache.cassandra.schema; import java.util.Arrays; -import java.util.EnumMap; -import java.util.HashMap; import java.util.Map; import java.util.Objects; +import java.util.TreeMap; import com.google.common.base.MoreObjects; import com.google.common.collect.ImmutableMap; @@ -37,7 +36,9 @@ public final class AutoRepairParams { public enum Option { - ENABLED, + FULL_ENABLED, + INCREMENTAL_ENABLED, + PREVIEW_REPAIRED_ENABLED, PRIORITY; @Override @@ -47,31 +48,29 @@ public String toString() } } - public static final Map DEFAULT_SUB_OPTIONS = ImmutableMap.of( - Option.ENABLED.toString(), Boolean.toString(true), - Option.PRIORITY.toString(), "0" - ); - - public static final Map> DEFAULT_OPTIONS = - ImmutableMap.of(AutoRepairConfig.RepairType.FULL, DEFAULT_SUB_OPTIONS, - AutoRepairConfig.RepairType.INCREMENTAL, DEFAULT_SUB_OPTIONS, - AutoRepairConfig.RepairType.PREVIEW_REPAIRED, DEFAULT_SUB_OPTIONS); - - public final AutoRepairConfig.RepairType type; + private ImmutableMap options; - private Map> options = DEFAULT_OPTIONS; + public static final Map DEFAULT_OPTIONS = ImmutableMap.of( + Option.FULL_ENABLED.name().toLowerCase(), Boolean.toString(true), + Option.INCREMENTAL_ENABLED.name().toLowerCase(), Boolean.toString(true), + Option.PREVIEW_REPAIRED_ENABLED.name().toLowerCase(), Boolean.toString(true), + Option.PRIORITY.toString(), "0" + ); - AutoRepairParams(AutoRepairConfig.RepairType type) + AutoRepairParams(Map options) { - this.type = type; + this.options = ImmutableMap.copyOf(options); } - public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Map options) + public static final AutoRepairParams DEFAULT = + new AutoRepairParams(DEFAULT_OPTIONS); + + public static AutoRepairParams create(Map options) { - Map> optionsMap = new EnumMap<>(AutoRepairConfig.RepairType.class); - for (Map.Entry> entry : DEFAULT_OPTIONS.entrySet()) + Map optionsMap = new TreeMap<>(); + for (Map.Entry entry : DEFAULT_OPTIONS.entrySet()) { - optionsMap.put(entry.getKey(), new HashMap<>(entry.getValue())); + optionsMap.put(entry.getKey(), entry.getValue()); } if (options != null) { @@ -81,38 +80,61 @@ public static AutoRepairParams create(AutoRepairConfig.RepairType repairType, Ma { throw new ConfigurationException(format("Unknown property '%s'", entry.getKey())); } - optionsMap.get(repairType).put(entry.getKey(), entry.getValue()); + optionsMap.put(entry.getKey(), entry.getValue()); } } - AutoRepairParams repairParams = new AutoRepairParams(repairType); - repairParams.options = optionsMap; - return repairParams; + return new AutoRepairParams(optionsMap); } - public boolean repairEnabled() + public boolean repairEnabled(AutoRepairConfig.RepairType type) { - String enabled = options.get(type).get(Option.ENABLED.toString()); + String option = type.toString().toLowerCase() + "_enabled"; + String enabled = options.get(option); return enabled == null - ? Boolean.parseBoolean(DEFAULT_OPTIONS.get(type).get(Option.ENABLED.toString())) + ? Boolean.parseBoolean(DEFAULT_OPTIONS.get(option)) : Boolean.parseBoolean(enabled); } public int priority() { - String priority = options.get(type).get(Option.PRIORITY.toString()); + String priority = options.get(Option.PRIORITY.toString()); return priority == null - ? Integer.parseInt(DEFAULT_OPTIONS.get(type).get(Option.PRIORITY.toString())) + ? Integer.parseInt(DEFAULT_OPTIONS.get(Option.PRIORITY.toString())) : Integer.parseInt(priority); } public void validate() { - String enabled = options.get(type).get(Option.ENABLED.toString()); - if (enabled != null && !isValidBoolean(enabled)) + for (Option option : Option.values()) + { + if (!options.containsKey(option.toString().toLowerCase())) + { + throw new ConfigurationException(format("Missing repair sub-option '%s'", option)); + } + } + if (options.get(Option.FULL_ENABLED.toString().toLowerCase()) != null && !isValidBoolean(options.get(Option.FULL_ENABLED.toString().toLowerCase()))) { throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", - enabled, - Option.ENABLED)); + options.get(Option.FULL_ENABLED.toString().toLowerCase()), + Option.FULL_ENABLED)); + } + if (options.get(Option.INCREMENTAL_ENABLED.toString().toLowerCase()) != null && !isValidBoolean(options.get(Option.INCREMENTAL_ENABLED.toString().toLowerCase()))) + { + throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", + options.get(Option.INCREMENTAL_ENABLED.toString().toLowerCase()), + Option.INCREMENTAL_ENABLED)); + } + if (options.get(Option.PREVIEW_REPAIRED_ENABLED.toString().toLowerCase()) != null && !isValidBoolean(options.get(Option.PREVIEW_REPAIRED_ENABLED.toString().toLowerCase()))) + { + throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", + options.get(Option.PREVIEW_REPAIRED_ENABLED.toString().toLowerCase()), + Option.PREVIEW_REPAIRED_ENABLED)); + } + if (options.get(Option.PRIORITY.toString().toLowerCase()) != null && !isValidInt(options.get(Option.PRIORITY.toString().toLowerCase()))) + { + throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be an integer", + options.get(Option.PRIORITY.toString().toLowerCase()), + Option.PRIORITY)); } } @@ -121,19 +143,25 @@ public static boolean isValidBoolean(String value) return StringUtils.equalsIgnoreCase(value, "true") || StringUtils.equalsIgnoreCase(value, "false"); } + public static boolean isValidInt(String value) + { + return StringUtils.isNumeric(value); + } + + public Map options() { - return options.get(type); + return options; } - public static AutoRepairParams fromMap(AutoRepairConfig.RepairType repairType, Map map) + public static AutoRepairParams fromMap(Map map) { - return create(repairType, map); + return create(map); } public Map asMap() { - return options.get(type); + return options; } @Override diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index ee219188261b..f7fe80d83d4c 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -133,9 +133,7 @@ private SchemaKeyspace() + "cdc boolean," + "read_repair text," + "fast_path frozen>," - + "repair_full frozen>," - + "repair_incremental frozen>," - + "repair_preview_repaired frozen>," + + "auto_repair frozen>," + "PRIMARY KEY ((keyspace_name), table_name))"); private static final TableMetadata Columns = @@ -220,9 +218,7 @@ private SchemaKeyspace() + "additional_write_policy text," + "cdc boolean," + "read_repair text," - + "repair_full frozen>," - + "repair_incremental frozen>," - + "repair_preview_repaired frozen>," + + "auto_repair frozen>," + "PRIMARY KEY ((keyspace_name), view_name))"); private static final TableMetadata Indexes = @@ -600,9 +596,7 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("compression", params.compression.asMap()) .add("read_repair", params.readRepair.toString()) .add("extensions", params.extensions) - .add("repair_full", params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).asMap()) - .add("repair_incremental", params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).asMap()) - .add("repair_preview_repaired", params.automatedRepair.get(AutoRepairConfig.RepairType.PREVIEW_REPAIRED).asMap()); + .add("auto_repair", params.autoRepair.asMap()); // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ @@ -1093,9 +1087,8 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) .fastPath(getFastPathStrategy(row)) - .automatedRepairFull(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.FULL, row.getFrozenTextMap("repair_full"))) - .automatedRepairIncremental(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.INCREMENTAL, row.getFrozenTextMap("repair_incremental"))) - .automatedRepairPreviewRepaired(AutoRepairParams.fromMap(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, row.getFrozenTextMap("repair_preview_repaired"))); + .automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))) +; // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index d769f88a4452..7bb1a6a54e6d 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.EnumMap; import java.util.Map; import java.util.Map.Entry; @@ -39,7 +38,6 @@ import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.reads.PercentileSpeculativeRetryPolicy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; @@ -103,9 +101,7 @@ public enum Option TRANSACTIONAL_MODE, TRANSACTIONAL_MIGRATION_FROM, PENDING_DROP, - REPAIR_FULL, - REPAIR_INCREMENTAL, - REPAIR_PREVIEW_REPAIRED, + AUTO_REPAIR, ; @Override @@ -139,7 +135,7 @@ public String toString() public final TransactionalMigrationFromMode transactionalMigrationFrom; public final boolean pendingDrop; - public final Map automatedRepair; + public final AutoRepairParams autoRepair; private TableParams(Builder builder) { @@ -169,14 +165,7 @@ private TableParams(Builder builder) transactionalMigrationFrom = builder.transactionalMigrationFrom; pendingDrop = builder.pendingDrop; checkNotNull(transactionalMigrationFrom); - automatedRepair = new EnumMap(AutoRepairConfig.RepairType.class) - { - { - put(AutoRepairConfig.RepairType.FULL, builder.automatedRepairFull); - put(AutoRepairConfig.RepairType.INCREMENTAL, builder.automatedRepairIncremental); - put(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, builder.automatedRepairPreviewRepaired); - } - }; + autoRepair = builder.autoRepair; } public static Builder builder() @@ -209,9 +198,7 @@ public static Builder builder(TableParams params) .transactionalMode(params.transactionalMode) .transactionalMigrationFrom(params.transactionalMigrationFrom) .pendingDrop(params.pendingDrop) - .automatedRepairFull(params.automatedRepair.get(AutoRepairConfig.RepairType.FULL)) - .automatedRepairIncremental(params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL)) - .automatedRepairPreviewRepaired(params.automatedRepair.get(AutoRepairConfig.RepairType.PREVIEW_REPAIRED)) + .automatedRepair(params.autoRepair) ; } @@ -271,10 +258,7 @@ public void validate() if (transactionalMode.isTestMode() && !CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getBoolean()) fail("Transactional mode " + transactionalMode + " can't be used if " + CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getKey() + " is not set"); - for (Map.Entry entry : automatedRepair.entrySet()) - { - entry.getValue().validate(); - } + autoRepair.validate(); } private static void fail(String format, Object... args) @@ -316,7 +300,7 @@ public boolean equals(Object o) && transactionalMode == p.transactionalMode && transactionalMigrationFrom == p.transactionalMigrationFrom && pendingDrop == p.pendingDrop - && automatedRepair.equals(p.automatedRepair); + && autoRepair.equals(p.autoRepair); } @Override @@ -345,7 +329,7 @@ public int hashCode() transactionalMode, transactionalMigrationFrom, pendingDrop, - automatedRepair); + autoRepair); } @Override @@ -376,9 +360,7 @@ public String toString() .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) .add(PENDING_DROP.toString(), pendingDrop) - .add(Option.REPAIR_FULL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.FULL)) - .add(Option.REPAIR_INCREMENTAL.toString(), automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL)) - .add(Option.REPAIR_PREVIEW_REPAIRED.toString(), automatedRepair.get(AutoRepairConfig.RepairType.PREVIEW_REPAIRED)) + .add(Option.AUTO_REPAIR.toString(), autoRepair) .toString(); } @@ -441,11 +423,7 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()) .newLine() - .append("AND repair_full = ").append(automatedRepair.get(AutoRepairConfig.RepairType.FULL).asMap()) - .newLine() - .append("AND repair_incremental = ").append(automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).asMap()) - .newLine() - .append("AND repair_preview_repaired = ").append(automatedRepair.get(AutoRepairConfig.RepairType.PREVIEW_REPAIRED).asMap()); + .append("AND auto_repair = ").append(autoRepair.asMap()); } public static final class Builder @@ -474,10 +452,7 @@ public static final class Builder public TransactionalMigrationFromMode transactionalMigrationFrom = TransactionalMigrationFromMode.none; public boolean pendingDrop = false; - private AutoRepairParams automatedRepairFull = new AutoRepairParams(AutoRepairConfig.RepairType.FULL); - private AutoRepairParams automatedRepairIncremental = new AutoRepairParams(AutoRepairConfig.RepairType.INCREMENTAL); - private AutoRepairParams automatedRepairPreviewRepaired = new AutoRepairParams(AutoRepairConfig.RepairType.PREVIEW_REPAIRED); - + private AutoRepairParams autoRepair = AutoRepairParams.DEFAULT; public Builder() { } @@ -625,21 +600,9 @@ public Builder pendingDrop(boolean pendingDrop) return this; } - public Builder automatedRepairFull(AutoRepairParams val) - { - automatedRepairFull = val; - return this; - } - - public Builder automatedRepairIncremental(AutoRepairParams val) - { - automatedRepairIncremental = val; - return this; - } - - public Builder automatedRepairPreviewRepaired(AutoRepairParams val) + public Builder automatedRepair(AutoRepairParams val) { - automatedRepairPreviewRepaired = val; + autoRepair = val; return this; } } diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index c5e5e2e70ee2..9f27ee04a7fe 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -1144,9 +1144,7 @@ private static String tableParametersCql() " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = '99p'\n" + - " AND repair_full = {'enabled': 'true'}\n" + - " AND repair_incremental = {'enabled': 'true'}\n" + - " AND repair_preview_repaired = {'enabled': 'true'};"; + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; } private static String cqlQuoted(Map map) @@ -1174,9 +1172,7 @@ private static String mvParametersCql() " AND min_index_interval = 128\n" + " AND read_repair = 'BLOCKING'\n" + " AND speculative_retry = '99p'\n" + - " AND repair_full = {'enabled': 'true'}\n" + - " AND repair_incremental = {'enabled': 'true'}\n" + - " AND repair_preview_repaired = {'enabled': 'true'};"; + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; } private static String keyspaceOutput() diff --git a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java index 8a6e2d7fbf56..a328790cce9d 100644 --- a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java +++ b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java @@ -350,9 +350,7 @@ public void testCfmOptionsCQL() " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = 'ALWAYS'\n" + - " AND repair_full = {'enabled': 'true'}\n" + - " AND repair_incremental = {'enabled': 'true'}\n" + - " AND repair_preview_repaired = {'enabled': 'true'};" + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};" )); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index c07c7a5ae137..08196f425ad1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -129,7 +129,7 @@ public void setup() SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", KEYSPACE, TABLE)); - QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH repair_full = {'enabled': 'false'} AND repair_incremental = {'enabled': 'false'} AND repair_preview_repaired = {'enabled': 'false'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH auto_repair = {'full_enabled': 'false', 'incremental_enabled': 'false', 'preview_repaired_enabled': 'false', 'priority': '0'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); @@ -522,10 +522,10 @@ public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws @Test public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() { - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).repairEnabled()); - Assert.assertTrue(cfm.params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.FULL).repairEnabled()); - Assert.assertFalse(cfmDisabledAutoRepair.params.automatedRepair.get(AutoRepairConfig.RepairType.INCREMENTAL).repairEnabled()); + Assert.assertTrue(cfm.params.autoRepair.repairEnabled(AutoRepairConfig.RepairType.FULL)); + Assert.assertTrue(cfm.params.autoRepair.repairEnabled(AutoRepairConfig.RepairType.INCREMENTAL)); + Assert.assertFalse(cfmDisabledAutoRepair.params.autoRepair.repairEnabled(AutoRepairConfig.RepairType.FULL)); + Assert.assertFalse(cfmDisabledAutoRepair.params.autoRepair.repairEnabled(AutoRepairConfig.RepairType.INCREMENTAL)); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setRepairMinInterval(repairType, "0s"); @@ -567,9 +567,7 @@ public void testTokenRangesNoSplit() @Test public void testTableAttribute() { - assertTrue(TableAttributes.validKeywords().contains("repair_full")); - assertTrue(TableAttributes.validKeywords().contains("repair_incremental")); - assertTrue(TableAttributes.validKeywords().contains("repair_preview_repaired")); + assertTrue(TableAttributes.validKeywords().contains("auto_repair")); } @Test @@ -578,9 +576,9 @@ public void testDefaultAutomatedRepair() for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { Assert.assertTrue(String.format("expected repair type %s to be enabled on table %s", repairType, cfm.name), - cfm.params.automatedRepair.get(repairType).repairEnabled()); + cfm.params.autoRepair.repairEnabled(repairType)); Assert.assertFalse(String.format("expected repair type %s to be disabled on table %s", repairType, cfmDisabledAutoRepair.name), - cfmDisabledAutoRepair.params.automatedRepair.get(repairType).repairEnabled()); + cfmDisabledAutoRepair.params.autoRepair.repairEnabled(repairType)); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java index 011df9d1592e..7b95acb879de 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java @@ -37,9 +37,9 @@ public class PrioritizedRepairPlanTest extends CQLTester public void testBuildWithDifferentPriorities() { // Test reordering assignments with different priorities - String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); - String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '3'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '1'}"); List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1, table2, table3); assertEquals(3, prioritizedRepairPlans.size()); @@ -59,9 +59,9 @@ public void testBuildWithDifferentPriorities() public void testBuildWithSamePriority() { // Test reordering assignments with the same priority - String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); // Expect only 1 plan since all tables share the same priority List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1, table2, table3); @@ -82,17 +82,17 @@ public void testBuildWithSamePriority() public void testBuildWithMixedPriorities() { String ks1 = createKeyspace("CREATE KEYSPACE %s WITH replication={ 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"); - String table1 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table2 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '3'}"); - String table3 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '2'}"); - String table4 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + String table1 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table2 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '3'}"); + String table3 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table4 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '1'}"); // No priority table should be bucketed at priority 0 String table5 = createTable(ks1,"CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); // Create a new keyspace to ensure its tables get grouped with appropriate priority bucket String ks2 = createKeyspace("CREATE KEYSPACE %s WITH replication={ 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"); String table6 = createTable(ks2,"CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); - String table7 = createTable(ks2,"CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '1'}"); + String table7 = createTable(ks2,"CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '1'}"); Map> keyspaceToTableMap = new HashMap<>(); keyspaceToTableMap.put(ks1, Lists.newArrayList(table1, table2, table3, table4, table5)); @@ -149,7 +149,7 @@ public void testBuildWithEmptyTableList() public void testBuildWithOneTable() { // Test with a single element (should remain unchanged) - String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH repair_full = {'enabled': 'true', 'priority': '5'}"); + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '5'}"); // Expect only 1 plans List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1); From 48ab04389ff896fb581fa885306d9f711154f6cd Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 10 Dec 2024 12:25:59 -0800 Subject: [PATCH 092/257] Fix ant checkstyle errors --- .../statements/schema/TableAttributes.java | 1 - .../repair/autorepair/AutoRepairConfig.java | 3 ++- .../repair/autorepair/RepairAssignment.java | 1 - .../cassandra/schema/AutoRepairParams.java | 27 ++++++++++--------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index 5d5f1ba903a9..9ec04f502bc6 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -27,7 +27,6 @@ import org.apache.cassandra.cql3.statements.PropertyDefinitions; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.schema.AutoRepairParams; import org.apache.cassandra.schema.CachingParams; import org.apache.cassandra.schema.CompactionParams; diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index b84ee5f2f652..8eaef0b6ecc5 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -32,6 +32,7 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.config.ParameterizedClass; +import org.apache.cassandra.utils.LocalizeString; public class AutoRepairConfig implements Serializable { @@ -65,7 +66,7 @@ public enum RepairType implements Serializable RepairType() { - this.configName = name().toLowerCase(); + this.configName = LocalizeString.toLowerCaseLocalized(name()); } RepairType(String configName) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java index c06d58e71fa2..63f8fbed4426 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java @@ -21,7 +21,6 @@ import java.util.List; import java.util.Objects; -import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index 2b289a8c3b44..105b9f6d0ddd 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -28,6 +28,7 @@ import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.utils.LocalizeString; import static java.lang.String.format; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; @@ -51,9 +52,9 @@ public String toString() private ImmutableMap options; public static final Map DEFAULT_OPTIONS = ImmutableMap.of( - Option.FULL_ENABLED.name().toLowerCase(), Boolean.toString(true), - Option.INCREMENTAL_ENABLED.name().toLowerCase(), Boolean.toString(true), - Option.PREVIEW_REPAIRED_ENABLED.name().toLowerCase(), Boolean.toString(true), + LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.name()), Boolean.toString(true), + LocalizeString.toLowerCaseLocalized(Option.INCREMENTAL_ENABLED.name()), Boolean.toString(true), + LocalizeString.toLowerCaseLocalized(Option.PREVIEW_REPAIRED_ENABLED.name()), Boolean.toString(true), Option.PRIORITY.toString(), "0" ); @@ -88,7 +89,7 @@ public static AutoRepairParams create(Map options) public boolean repairEnabled(AutoRepairConfig.RepairType type) { - String option = type.toString().toLowerCase() + "_enabled"; + String option = LocalizeString.toLowerCaseLocalized(type.toString()) + "_enabled"; String enabled = options.get(option); return enabled == null ? Boolean.parseBoolean(DEFAULT_OPTIONS.get(option)) @@ -107,33 +108,33 @@ public void validate() { for (Option option : Option.values()) { - if (!options.containsKey(option.toString().toLowerCase())) + if (!options.containsKey(LocalizeString.toLowerCaseLocalized(option.toString()))) { throw new ConfigurationException(format("Missing repair sub-option '%s'", option)); } } - if (options.get(Option.FULL_ENABLED.toString().toLowerCase()) != null && !isValidBoolean(options.get(Option.FULL_ENABLED.toString().toLowerCase()))) + if (options.get(LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.toString())) != null && !isValidBoolean(options.get(LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.toString())))) { throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", - options.get(Option.FULL_ENABLED.toString().toLowerCase()), + options.get(LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.toString())), Option.FULL_ENABLED)); } - if (options.get(Option.INCREMENTAL_ENABLED.toString().toLowerCase()) != null && !isValidBoolean(options.get(Option.INCREMENTAL_ENABLED.toString().toLowerCase()))) + if (options.get(LocalizeString.toLowerCaseLocalized(Option.INCREMENTAL_ENABLED.toString())) != null && !isValidBoolean(options.get(LocalizeString.toLowerCaseLocalized(Option.INCREMENTAL_ENABLED.toString())))) { throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", - options.get(Option.INCREMENTAL_ENABLED.toString().toLowerCase()), + options.get(LocalizeString.toLowerCaseLocalized(Option.INCREMENTAL_ENABLED.toString())), Option.INCREMENTAL_ENABLED)); } - if (options.get(Option.PREVIEW_REPAIRED_ENABLED.toString().toLowerCase()) != null && !isValidBoolean(options.get(Option.PREVIEW_REPAIRED_ENABLED.toString().toLowerCase()))) + if (options.get(LocalizeString.toLowerCaseLocalized(Option.PREVIEW_REPAIRED_ENABLED.toString())) != null && !isValidBoolean(options.get(LocalizeString.toLowerCaseLocalized(Option.PREVIEW_REPAIRED_ENABLED.toString())))) { throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", - options.get(Option.PREVIEW_REPAIRED_ENABLED.toString().toLowerCase()), + options.get(LocalizeString.toLowerCaseLocalized(Option.PREVIEW_REPAIRED_ENABLED.toString())), Option.PREVIEW_REPAIRED_ENABLED)); } - if (options.get(Option.PRIORITY.toString().toLowerCase()) != null && !isValidInt(options.get(Option.PRIORITY.toString().toLowerCase()))) + if (options.get(LocalizeString.toLowerCaseLocalized(Option.PRIORITY.toString())) != null && !isValidInt(options.get(LocalizeString.toLowerCaseLocalized(Option.PRIORITY.toString())))) { throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be an integer", - options.get(Option.PRIORITY.toString().toLowerCase()), + options.get(LocalizeString.toLowerCaseLocalized(Option.PRIORITY.toString())), Option.PRIORITY)); } } From 4cc3fa2a86c56f52e4d872b9a0040cb8c65dc2dc Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 10 Dec 2024 14:17:51 -0800 Subject: [PATCH 093/257] Ignore keyspaces with MetaStrategy --- .../apache/cassandra/repair/autorepair/AutoRepairUtils.java | 3 ++- .../repair/autorepair/AutoRepairParameterizedTest.java | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 79bf638eb7d9..df6627507771 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -55,6 +55,7 @@ import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.AbstractReplicationStrategy; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.MetaStrategy; import org.apache.cassandra.locator.NetworkTopologyStrategy; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; @@ -802,7 +803,7 @@ public static boolean checkNodeContainsKeyspaceReplica(Keyspace ks) ksReplicaOnNode = false; } } - if (replicationStrategy instanceof LocalStrategy) + if (replicationStrategy instanceof LocalStrategy || replicationStrategy instanceof MetaStrategy) { ksReplicaOnNode = false; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 08196f425ad1..70c4b3071b0c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -650,7 +650,7 @@ public void testRepairMaxRetries() // system_auth.resource_role_permissons_index,system_traces.sessions,system_traces.events,ks.tbl, // system_distributed.auto_repair_priority,system_distributed.repair_history,system_distributed.auto_repair_history, // system_distributed.view_build_status,system_distributed.parent_repair_history,system_distributed.partition_denylist - int exptedTablesGoingThroughRepair = 18; + int exptedTablesGoingThroughRepair = 17; assertEquals(config.getRepairMaxRetries()*exptedTablesGoingThroughRepair, sleepCalls.get()); verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); From 6d5d648c325477f1e279db82483df933cc417ed1 Mon Sep 17 00:00:00 2001 From: Jaydeepkumar Chovatia Date: Tue, 10 Dec 2024 14:44:18 -0800 Subject: [PATCH 094/257] Adjust BigTableScanner.getScanner as per 5.1 API set --- .../apache/cassandra/repair/autorepair/RepairRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index 895cde20542d..ecae4dc2c2a7 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -577,7 +577,7 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai // get the bounds of the sstable for this range using the index file but do not actually read it. List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); - ISSTableScanner rangeScanner = BigTableScanner.getScanner((BigTableReader) reader, Collections.singleton(tokenRange)); + ISSTableScanner rangeScanner = reader.getScanner(Collections.singleton(tokenRange)); // Type check scanner returned as it may be an EmptySSTableScanner if the range is not covered in the // SSTable, in this case we will avoid incrementing approxBytesInRange. if (rangeScanner instanceof BigTableScanner) From f18b98349a9fe6af9fd316add1640866b3b10bb3 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 10 Dec 2024 15:56:06 -0800 Subject: [PATCH 095/257] Fix antcheckstyle --- .../apache/cassandra/repair/autorepair/RepairRangeSplitter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java index ecae4dc2c2a7..fab0f07fb195 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java @@ -50,7 +50,6 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.io.sstable.format.big.BigTableReader; import org.apache.cassandra.io.sstable.format.big.BigTableScanner; import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; import org.apache.cassandra.io.sstable.metadata.MetadataType; From 0036bff0e4976747cc9c0efeb4218bc65be234da Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 11 Dec 2024 20:30:24 -0800 Subject: [PATCH 096/257] Fix ant test-latest failures for RepairRangeSplitterTest-latest_jdk17 Tests run: 15, Failures: 7 --- .../cassandra/repair/autorepair/RepairRangeSplitterTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java index 9baaecca545e..d93a40b689f9 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java @@ -41,6 +41,7 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.sstable.format.big.BigFormat; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.FilteredRepairAssignments; import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SizeEstimate; @@ -67,6 +68,7 @@ public static void setUpClass() AutoRepairService.setup(); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); + DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(BigFormat.NAME)); } @Before @@ -74,6 +76,7 @@ public void setUp() { repairRangeSplitter = new RepairRangeSplitter(Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + assertTrue(BigFormat.isSelected()); } @Test From 32ae5eaee5813a20c069ddf9cd6b76acd418b078 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 15 Dec 2024 10:54:20 -0800 Subject: [PATCH 097/257] utest: calculate expected tables dynamically instead of a fixed value --- .../AutoRepairParameterizedTest.java | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 70c4b3071b0c..2dc7dff32493 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -37,6 +37,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.locator.LocalStrategy; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.service.StorageService; @@ -101,6 +102,9 @@ public class AutoRepairParameterizedTest extends CQLTester RepairCoordinator repairRunnable; private static AutoRepairConfig defaultConfig; + // Expected number of tables that should be repaired. + private static int expectedTablesGoingThroughRepair; + @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; @@ -121,6 +125,22 @@ public static void setupClass() throws Exception AutoRepairUtils.setup(); StorageService.instance.doAutoRepairSetup(); DatabaseDescriptor.setCDCEnabled(false); + + // Calculate the expected number of tables to be repaired, this should be all system keyspaces that are + // distributed, plus 1 for the table we created (ks.tbl), excluding the 'mv' materialized view and + // 'tbl_disabled_auto_repair' we created. + expectedTablesGoingThroughRepair = 0; + for (Keyspace keyspace : Keyspace.all()) + { + // skip LocalStrategy keyspaces as these aren't repaired. + if (keyspace.getReplicationStrategy() instanceof LocalStrategy) + { + continue; + } + + int expectedTables = keyspace.getName().equals("ks") ? 1 : keyspace.getColumnFamilyStores().size(); + expectedTablesGoingThroughRepair += expectedTables; + } } @Before @@ -646,15 +666,10 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); - //system_auth.role_permissions,system_auth.network_permissions,system_auth.role_members,system_auth.roles, - // system_auth.resource_role_permissons_index,system_traces.sessions,system_traces.events,ks.tbl, - // system_distributed.auto_repair_priority,system_distributed.repair_history,system_distributed.auto_repair_history, - // system_distributed.view_build_status,system_distributed.parent_repair_history,system_distributed.partition_denylist - int exptedTablesGoingThroughRepair = 17; - assertEquals(config.getRepairMaxRetries()*exptedTablesGoingThroughRepair, sleepCalls.get()); + assertEquals(config.getRepairMaxRetries()*expectedTablesGoingThroughRepair, sleepCalls.get()); verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(exptedTablesGoingThroughRepair); + verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(expectedTablesGoingThroughRepair); } @Test @@ -680,7 +695,7 @@ public void testRepairSuccessAfterRetry() AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(18); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedTablesGoingThroughRepair); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } From 939c96214424520bcda6c0f70a74e2b4d6509ed1 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 17 Dec 2024 20:16:54 -0800 Subject: [PATCH 098/257] Ignore repair for system_traces keyspace --- .../cassandra/repair/autorepair/AutoRepair.java | 2 +- .../repair/autorepair/AutoRepairUtils.java | 16 +++++++++++----- .../autorepair/AutoRepairParameterizedTest.java | 6 ++++++ .../repair/autorepair/AutoRepairTest.java | 4 ++-- .../repair/autorepair/AutoRepairUtilsTest.java | 12 +++++++++--- 5 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index eafd40eaddf7..d6d58746f45f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -216,7 +216,7 @@ public void repair(AutoRepairConfig.RepairType repairType) Map> keyspacesAndTablesToRepair = new LinkedHashMap<>(); for (Keyspace keyspace : keyspaces) { - if (!AutoRepairUtils.checkNodeContainsKeyspaceReplica(keyspace)) + if (!AutoRepairUtils.shouldConsiderKeyspace(keyspace)) { continue; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index df6627507771..635abea37177 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -790,24 +790,30 @@ public static Set getPriorityHosts(RepairType repairType) return hosts; } - public static boolean checkNodeContainsKeyspaceReplica(Keyspace ks) + public static boolean shouldConsiderKeyspace(Keyspace ks) { AbstractReplicationStrategy replicationStrategy = ks.getReplicationStrategy(); - boolean ksReplicaOnNode = true; + boolean repair = true; if (replicationStrategy instanceof NetworkTopologyStrategy) { Set datacenters = ((NetworkTopologyStrategy) replicationStrategy).getDatacenters(); String localDC = DatabaseDescriptor.getLocator().location(FBUtilities.getBroadcastAddressAndPort()).datacenter; if (!datacenters.contains(localDC)) { - ksReplicaOnNode = false; + repair = false; } } if (replicationStrategy instanceof LocalStrategy || replicationStrategy instanceof MetaStrategy) { - ksReplicaOnNode = false; + repair = false; } - return ksReplicaOnNode; + if (ks.getName().equalsIgnoreCase(SchemaConstants.TRACE_KEYSPACE_NAME)) + { + // by default, ignore the tables under system_traces as they do not have + // that much important data + repair = false; + } + return repair; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 2dc7dff32493..af765c377358 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -62,6 +62,7 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.LocalizeString; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; @@ -137,6 +138,11 @@ public static void setupClass() throws Exception { continue; } + // skip system_traces keyspaces + if (keyspace.getName().equalsIgnoreCase(SchemaConstants.TRACE_KEYSPACE_NAME)) + { + continue; + } int expectedTables = keyspace.getName().equals("ks") ? 1 : keyspace.getColumnFamilyStores().size(); expectedTablesGoingThroughRepair += expectedTables; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 425dc9b31f37..283527f61117 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -141,14 +141,14 @@ public void testCheckNTSreplicationNodeInsideOutsideDC() // case 1 : // node reside in "datacenter1" // keyspace has replica in "datacenter1" - Assert.assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + Assert.assertTrue(AutoRepairUtils.shouldConsiderKeyspace(ks)); } else if (ks.getName().equals(ksname2)) { // case 2 : // node reside in "datacenter1" // keyspace has replica in "datacenter2" - Assert.assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + Assert.assertFalse(AutoRepairUtils.shouldConsiderKeyspace(ks)); } } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 9da8d1d87bb2..07c7ec004795 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -425,7 +425,7 @@ public void testCheckNodeContainsKeyspaceReplica() { Keyspace ks = Keyspace.open("ks"); - assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(ks)); + assertTrue(AutoRepairUtils.shouldConsiderKeyspace(ks)); } @Test @@ -474,8 +474,8 @@ public void testMyTurnToRunRepairShouldReturnMyTurnWhenRepairOngoing() @Test public void testLocalStrategyAndNetworkKeyspace() { - assertFalse(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open("system"))); - assertTrue(AutoRepairUtils.checkNodeContainsKeyspaceReplica(Keyspace.open(KEYSPACE))); + assertFalse(AutoRepairUtils.shouldConsiderKeyspace(Keyspace.open("system"))); + assertTrue(AutoRepairUtils.shouldConsiderKeyspace(Keyspace.open(KEYSPACE))); } @Test @@ -497,4 +497,10 @@ public void testGetLastRepairTimeForNodeWhenHistoryIsEmpty() assertEquals(0, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); } + + @Test + public void tesSkipSystemTraces() + { + assertFalse(AutoRepairUtils.shouldConsiderKeyspace(Keyspace.open(SchemaConstants.TRACE_KEYSPACE_NAME))); + } } From 48d3ee66a8dffdba762427332fba68a9f9b9ab31 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 19 Dec 2024 09:59:36 -0800 Subject: [PATCH 099/257] CR from Andy T --- .../repair/autorepair/AutoRepairParameterizedTest.java | 1 - .../apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index af765c377358..4e12fc497210 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -62,7 +62,6 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.LocalizeString; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 07c7ec004795..d80b26a7fc31 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -499,7 +499,7 @@ public void testGetLastRepairTimeForNodeWhenHistoryIsEmpty() } @Test - public void tesSkipSystemTraces() + public void testSkipSystemTraces() { assertFalse(AutoRepairUtils.shouldConsiderKeyspace(Keyspace.open(SchemaConstants.TRACE_KEYSPACE_NAME))); } From f68838ce8dc44b4f9510ff0956d84e071f13025e Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 18 Dec 2024 20:35:17 -0800 Subject: [PATCH 100/257] Fix diff40 --- test/data/jmxdump/cassandra-4.0-jmx.yaml | 768 ++++++++++++++++++++++- 1 file changed, 740 insertions(+), 28 deletions(-) diff --git a/test/data/jmxdump/cassandra-4.0-jmx.yaml b/test/data/jmxdump/cassandra-4.0-jmx.yaml index e0d01272c83a..43fe3a03fbed 100644 --- a/test/data/jmxdump/cassandra-4.0-jmx.yaml +++ b/test/data/jmxdump/cassandra-4.0-jmx.yaml @@ -10190,6 +10190,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,n org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -10204,6 +10209,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,n org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11067,6 +11077,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11081,6 +11096,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11944,6 +11964,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11958,6 +11983,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -12821,6 +12851,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -12835,6 +12870,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -13698,6 +13738,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -13712,6 +13757,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -14575,6 +14625,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -14589,6 +14644,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -15452,6 +15512,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -15466,6 +15531,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -16329,6 +16399,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -16343,6 +16418,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -17206,6 +17286,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -17220,6 +17305,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18083,6 +18173,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18097,6 +18192,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18960,6 +19060,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18974,6 +19079,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19837,6 +19947,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,na org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19851,6 +19966,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,na org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -20714,6 +20834,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_st org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_statements,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -20728,6 +20853,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_st org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_statements,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -21591,6 +21721,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -21605,6 +21740,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -22468,6 +22608,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estima org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -22482,6 +22627,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estima org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -23345,6 +23495,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_act org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_activity,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -23359,6 +23514,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_act org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_activity,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -24222,6 +24382,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estim org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -24236,6 +24401,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estim org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25099,6 +25269,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25113,6 +25288,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25976,6 +26156,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25990,6 +26175,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -26853,6 +27043,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=view_builds org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=view_builds_in_progress,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -27730,6 +27925,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=networ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=network_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -29484,6 +29684,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_m org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_members,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -29498,6 +29703,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_m org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_members,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -30361,6 +30571,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_p org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -30375,6 +30590,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_p org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -31238,6 +31458,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles, org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -31252,6 +31477,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles, org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -32992,6 +33222,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope=repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -33869,6 +34104,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope=view_build_status,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -34746,6 +34986,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggr org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggregates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -34760,6 +35005,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggr org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggregates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35623,6 +35873,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=colu org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35637,6 +35892,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=colu org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -36500,6 +36760,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=drop org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=dropped_columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -37377,6 +37642,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=func org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=functions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -37391,6 +37661,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=func org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=functions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -38254,6 +38529,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=inde org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=indexes,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -38268,6 +38548,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=inde org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=indexes,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -39131,6 +39416,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keys org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keyspaces,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -39145,6 +39435,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keys org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keyspaces,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40008,6 +40303,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tabl org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tables,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40022,6 +40322,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tabl org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tables,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40885,6 +41190,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=trig org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=triggers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40899,6 +41209,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=trig org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=triggers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -41762,6 +42077,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=type org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=types,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -41776,6 +42096,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=type org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=types,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42639,6 +42964,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=view org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42653,6 +42983,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=view org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -43516,6 +43851,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=even org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -43530,6 +43870,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=even org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -44393,6 +44738,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sess org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sessions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -44407,6 +44757,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sess org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sessions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -45267,13 +45622,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterOffHeapMemoryUsed - name: objectName parameters: [] returnType: javax.management.ObjectName -org.apache.cassandra.metrics:type=ColumnFamily,name=BytesAnticompacted: - attributes: - - {access: read-only, name: Value, type: java.lang.Object} - operations: - - name: objectName - parameters: [] - returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -45281,13 +45629,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: - name: objectName parameters: [] returnType: javax.management.ObjectName -org.apache.cassandra.metrics:type=ColumnFamily,name=BytesMutatedAnticompaction: - attributes: - - {access: read-only, name: Value, type: java.lang.Object} - operations: - - name: objectName - parameters: [] - returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesPendingRepair: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -57152,6 +57493,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=Blo org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -57166,6 +57512,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=Byt org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -58233,6 +58584,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -58247,6 +58603,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -59314,6 +59675,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -59328,6 +59694,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -60395,6 +60766,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=Bloom org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -60409,6 +60785,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=Bytes org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -61476,6 +61857,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -61490,6 +61876,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -62557,6 +62948,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -62571,6 +62967,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -63638,6 +64039,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -63652,6 +64058,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -64719,6 +65130,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -64733,6 +65149,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -65800,6 +66221,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -65814,6 +66240,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -66881,6 +67312,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -66895,6 +67331,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -67962,6 +68403,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -67976,6 +68422,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -69043,6 +69494,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=Bloo org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -69057,6 +69513,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=Byte org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -70124,6 +70585,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statement org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statements,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -70138,6 +70604,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statement org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statements,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -71205,6 +71676,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=Bloom org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -71219,6 +71695,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=Bytes org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -72286,6 +72767,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -72300,6 +72786,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -73367,6 +73858,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -73381,6 +73877,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -74448,6 +74949,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,na org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -74462,6 +74968,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,na org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -75529,6 +76040,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -75543,6 +76059,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -76610,6 +77131,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -76624,6 +77150,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -77691,6 +78222,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_pro org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_progress,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -77705,6 +78241,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_pro org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_progress,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -78772,6 +79313,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -78786,6 +79332,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -79853,6 +80404,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=resource_role org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=resource_role_permissons_index,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -80934,6 +81490,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members, org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -80948,6 +81509,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members, org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -82015,6 +82581,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -82029,6 +82600,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -83096,6 +83672,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=Bl org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -83110,6 +83691,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=By org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -84177,6 +84763,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=parent org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=parent_repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -85258,6 +85849,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -85272,6 +85868,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -86339,6 +86940,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=view_b org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=view_build_status,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -87420,6 +88026,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates, org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -87434,6 +88045,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates, org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -88501,6 +89117,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -88515,6 +89136,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -89582,6 +90208,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_col org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -89596,6 +90227,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_col org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -90663,6 +91299,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -90677,6 +91318,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -91744,6 +92390,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -91758,6 +92409,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -92825,6 +93481,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -92839,6 +93500,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -93906,6 +94572,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -93920,6 +94591,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -94987,6 +95663,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,na org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -95001,6 +95682,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,na org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -96068,6 +96754,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -96082,6 +96773,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -97149,6 +97845,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -97163,6 +97864,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -98230,6 +98936,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -98244,6 +98955,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -99311,6 +100027,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,na org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -99325,6 +100046,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,na org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -100389,13 +101115,6 @@ org.apache.cassandra.metrics:type=Table,name=BloomFilterOffHeapMemoryUsed: - name: objectName parameters: [] returnType: javax.management.ObjectName -org.apache.cassandra.metrics:type=Table,name=BytesAnticompacted: - attributes: - - {access: read-only, name: Value, type: java.lang.Object} - operations: - - name: objectName - parameters: [] - returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesFlushed: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -100403,13 +101122,6 @@ org.apache.cassandra.metrics:type=Table,name=BytesFlushed: - name: objectName parameters: [] returnType: javax.management.ObjectName -org.apache.cassandra.metrics:type=Table,name=BytesMutatedAnticompaction: - attributes: - - {access: read-only, name: Value, type: java.lang.Object} - operations: - - name: objectName - parameters: [] - returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesPendingRepair: attributes: - {access: read-only, name: Value, type: java.lang.Object} From 3ce847aaa53e4cb3eff621d2d2b1b66718c630bc Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 18 Dec 2024 20:44:57 -0800 Subject: [PATCH 101/257] Fix diff41 --- test/data/jmxdump/cassandra-4.1-jmx.yaml | 768 ++++++++++++++++++++++- 1 file changed, 740 insertions(+), 28 deletions(-) diff --git a/test/data/jmxdump/cassandra-4.1-jmx.yaml b/test/data/jmxdump/cassandra-4.1-jmx.yaml index a5ea2a74a16b..1e85c6d48fcd 100644 --- a/test/data/jmxdump/cassandra-4.1-jmx.yaml +++ b/test/data/jmxdump/cassandra-4.1-jmx.yaml @@ -10190,6 +10190,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,n org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -10204,6 +10209,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,n org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11067,6 +11077,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11081,6 +11096,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11944,6 +11964,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11958,6 +11983,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -12821,6 +12851,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -12835,6 +12870,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -13698,6 +13738,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -13712,6 +13757,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -14575,6 +14625,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -14589,6 +14644,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -15452,6 +15512,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -15466,6 +15531,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -16329,6 +16399,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -16343,6 +16418,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -17206,6 +17286,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -17220,6 +17305,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18083,6 +18173,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18097,6 +18192,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18960,6 +19060,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18974,6 +19079,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19837,6 +19947,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,na org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19851,6 +19966,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,na org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -20714,6 +20834,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_st org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_statements,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -20728,6 +20853,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_st org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_statements,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -21591,6 +21721,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -21605,6 +21740,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -22468,6 +22608,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estima org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -22482,6 +22627,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estima org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -23345,6 +23495,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_act org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_activity_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -23359,6 +23514,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_act org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_activity_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -24222,6 +24382,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estim org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -24236,6 +24401,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estim org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25099,6 +25269,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25113,6 +25288,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25976,6 +26156,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25990,6 +26175,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -26853,6 +27043,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=view_builds org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=view_builds_in_progress,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -27730,6 +27925,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=networ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=network_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -29484,6 +29684,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_m org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_members,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -29498,6 +29703,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_m org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_members,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -30361,6 +30571,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_p org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -30375,6 +30590,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_p org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -31238,6 +31458,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles, org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -31252,6 +31477,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles, org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -32992,6 +33222,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope=repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -33869,6 +34104,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope=view_build_status,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -34746,6 +34986,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggr org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggregates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -34760,6 +35005,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggr org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggregates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35623,6 +35873,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=colu org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35637,6 +35892,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=colu org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -36500,6 +36760,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=drop org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=dropped_columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -37377,6 +37642,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=func org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=functions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -37391,6 +37661,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=func org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=functions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -38254,6 +38529,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=inde org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=indexes,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -38268,6 +38548,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=inde org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=indexes,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -39131,6 +39416,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keys org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keyspaces,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -39145,6 +39435,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keys org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keyspaces,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40008,6 +40303,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tabl org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tables,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40022,6 +40322,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tabl org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tables,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40885,6 +41190,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=trig org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=triggers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40899,6 +41209,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=trig org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=triggers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -41762,6 +42077,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=type org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=types,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -41776,6 +42096,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=type org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=types,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42639,6 +42964,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=view org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42653,6 +42983,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=view org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -43516,6 +43851,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=even org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -43530,6 +43870,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=even org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -44393,6 +44738,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sess org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sessions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -44407,6 +44757,11 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sess org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sessions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -45267,13 +45622,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterOffHeapMemoryUsed - name: objectName parameters: [] returnType: javax.management.ObjectName -org.apache.cassandra.metrics:type=ColumnFamily,name=BytesAnticompacted: - attributes: - - {access: read-only, name: Value, type: java.lang.Object} - operations: - - name: objectName - parameters: [] - returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -45281,13 +45629,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: - name: objectName parameters: [] returnType: javax.management.ObjectName -org.apache.cassandra.metrics:type=ColumnFamily,name=BytesMutatedAnticompaction: - attributes: - - {access: read-only, name: Value, type: java.lang.Object} - operations: - - name: objectName - parameters: [] - returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesPendingRepair: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -57152,6 +57493,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=Blo org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -57166,6 +57512,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=Byt org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -58233,6 +58584,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -58247,6 +58603,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -59314,6 +59675,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -59328,6 +59694,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -60395,6 +60766,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=Bloom org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -60409,6 +60785,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=Bytes org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -61476,6 +61857,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -61490,6 +61876,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -62557,6 +62948,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -62571,6 +62967,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -63638,6 +64039,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -63652,6 +64058,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -64719,6 +65130,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -64733,6 +65149,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -65800,6 +66221,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -65814,6 +66240,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -66881,6 +67312,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -66895,6 +67331,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -67962,6 +68403,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -67976,6 +68422,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -69043,6 +69494,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=Bloo org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -69057,6 +69513,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=Byte org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -70124,6 +70585,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statement org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statements,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -70138,6 +70604,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statement org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statements,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -71205,6 +71676,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=Bloom org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -71219,6 +71695,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=Bytes org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -72286,6 +72767,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -72300,6 +72786,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -73367,6 +73858,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -73381,6 +73877,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -74448,6 +74949,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,na org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -74462,6 +74968,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,na org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -75529,6 +76040,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -75543,6 +76059,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -76610,6 +77131,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -76624,6 +77150,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -77691,6 +78222,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_pro org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_progress,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -77705,6 +78241,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_pro org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_progress,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -78772,6 +79313,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -78786,6 +79332,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -79853,6 +80404,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=resource_role org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=resource_role_permissons_index,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -80934,6 +81490,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members, org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -80948,6 +81509,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members, org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -82015,6 +82581,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -82029,6 +82600,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -83096,6 +83672,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=Bl org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -83110,6 +83691,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=By org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -84177,6 +84763,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=parent org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=parent_repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -85258,6 +85849,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -85272,6 +85868,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -86339,6 +86940,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=view_b org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=view_build_status,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -87420,6 +88026,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates, org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -87434,6 +88045,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates, org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -88501,6 +89117,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -88515,6 +89136,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -89582,6 +90208,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_col org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -89596,6 +90227,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_col org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -90663,6 +91299,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -90677,6 +91318,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -91744,6 +92390,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -91758,6 +92409,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -92825,6 +93481,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -92839,6 +93500,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -93906,6 +94572,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -93920,6 +94591,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -94987,6 +95663,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,na org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -95001,6 +95682,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,na org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -96068,6 +96754,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -96082,6 +96773,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -97149,6 +97845,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -97163,6 +97864,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -98230,6 +98936,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -98244,6 +98955,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -99311,6 +100027,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,na org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -99325,6 +100046,11 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,na org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -100389,13 +101115,6 @@ org.apache.cassandra.metrics:type=Table,name=BloomFilterOffHeapMemoryUsed: - name: objectName parameters: [] returnType: javax.management.ObjectName -org.apache.cassandra.metrics:type=Table,name=BytesAnticompacted: - attributes: - - {access: read-only, name: Value, type: java.lang.Object} - operations: - - name: objectName - parameters: [] - returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesFlushed: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -100403,13 +101122,6 @@ org.apache.cassandra.metrics:type=Table,name=BytesFlushed: - name: objectName parameters: [] returnType: javax.management.ObjectName -org.apache.cassandra.metrics:type=Table,name=BytesMutatedAnticompaction: - attributes: - - {access: read-only, name: Value, type: java.lang.Object} - operations: - - name: objectName - parameters: [] - returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesPendingRepair: attributes: - {access: read-only, name: Value, type: java.lang.Object} From 7e5806672939033739711053b44e4bf4ea9b11ca Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 20 Dec 2024 11:10:38 -0800 Subject: [PATCH 102/257] Convert the metrics to TableMeter --- .../cassandra/metrics/KeyspaceMetrics.java | 10 ++++++ .../cassandra/metrics/TableMetrics.java | 31 +++++++++++-------- test/data/jmxdump/cassandra-4.0-jmx.yaml | 24 ++++++++++++++ test/data/jmxdump/cassandra-4.1-jmx.yaml | 24 ++++++++++++++ 4 files changed, 76 insertions(+), 13 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java index 209c5a7a8ede..e205230bd2ec 100644 --- a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java @@ -190,6 +190,11 @@ public class KeyspaceMetrics public final Meter tooManySSTableIndexesReadWarnings; public final Meter tooManySSTableIndexesReadAborts; + public final Meter bytesAnticompacted; + public final Meter bytesMutatedAnticompaction; + public final Meter bytesPreviewed; + public final Meter tokenRangesPreviewedDesynchronized; + public final Meter bytesPreviewedDesynchronized; public final LatencyMetrics viewSSTableIntervalTree; @@ -309,6 +314,11 @@ public KeyspaceMetrics(final Keyspace ks) outOfRangeTokenPaxosRequests = createKeyspaceCounter("PaxosOutOfRangeToken"); viewSSTableIntervalTree = createLatencyMetrics("ViewSSTableIntervalTree"); + bytesAnticompacted = createKeyspaceMeter("BytesAnticompacted"); + bytesMutatedAnticompaction = createKeyspaceMeter("BytesMutatedAnticompaction"); + bytesPreviewed = createKeyspaceMeter("BytesPreviewed"); + tokenRangesPreviewedDesynchronized = createKeyspaceMeter("TokenRangesPreviewedDesynchronized"); + bytesPreviewedDesynchronized = createKeyspaceMeter("BytesPreviewedDesynchronized"); } /** diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index 3aa478fbbb05..29a494067d96 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -219,15 +219,15 @@ public class TableMetrics /** number of partitions read creating merkle trees */ public final TableHistogram partitionsValidated; /** number of bytes read while doing anticompaction */ - public final Meter bytesAnticompacted; + public final TableMeter bytesAnticompacted; /** number of bytes where the whole sstable was contained in a repairing range so that we only mutated the repair status */ - public final Meter bytesMutatedAnticompaction; + public final TableMeter bytesMutatedAnticompaction; /** number of bytes that were scanned during preview repair */ - public final Meter bytesPreviewed; + public final TableMeter bytesPreviewed; /** number of desynchronized token ranges that were detected during preview repair */ - public final Meter tokenRangesPreviewedDesynchronized; + public final TableMeter tokenRangesPreviewedDesynchronized; /** number of desynchronized bytes that were detected during preview repair */ - public final Meter bytesPreviewedDesynchronized; + public final TableMeter bytesPreviewedDesynchronized; /** ratio of how much we anticompact vs how much we could mutate the repair status*/ public final Gauge mutatedAnticompactionGauge; @@ -837,15 +837,15 @@ public Long getValue() bytesValidated = createTableHistogram("BytesValidated", cfs.keyspace.metric.bytesValidated, false); partitionsValidated = createTableHistogram("PartitionsValidated", cfs.keyspace.metric.partitionsValidated, false); - bytesAnticompacted = createTableMeter("BytesAnticompacted"); - bytesMutatedAnticompaction = createTableMeter("BytesMutatedAnticompaction"); - bytesPreviewed = createTableMeter("BytesPreviewed"); - tokenRangesPreviewedDesynchronized = createTableMeter("TokenRangesPreviewedDesynchronized"); - bytesPreviewedDesynchronized = createTableMeter("BytesPreviewedDesynchronized"); + bytesAnticompacted = createTableMeter("BytesAnticompacted", cfs.keyspace.metric.bytesAnticompacted); + bytesMutatedAnticompaction = createTableMeter("BytesMutatedAnticompaction", cfs.keyspace.metric.bytesMutatedAnticompaction); + bytesPreviewed = createTableMeter("BytesPreviewed", cfs.keyspace.metric.bytesPreviewed); + tokenRangesPreviewedDesynchronized = createTableMeter("TokenRangesPreviewedDesynchronized", cfs.keyspace.metric.tokenRangesPreviewedDesynchronized); + bytesPreviewedDesynchronized = createTableMeter("BytesPreviewedDesynchronized", cfs.keyspace.metric.bytesPreviewedDesynchronized); mutatedAnticompactionGauge = createTableGauge("MutatedAnticompactionGauge", () -> { - double bytesMutated = bytesMutatedAnticompaction.getCount(); - double bytesAnticomp = bytesAnticompacted.getCount(); + double bytesMutated = bytesMutatedAnticompaction.table.getCount(); + double bytesAnticomp = bytesAnticompacted.table.getCount(); if (bytesAnticomp + bytesMutated > 0) return bytesMutated / (bytesAnticomp + bytesMutated); return 0.0; @@ -1179,10 +1179,15 @@ private TableMeter(Meter table, Meter keyspace, Meter global) } public void mark() + { + mark(1L); + } + + public void mark(long val) { for (Meter meter : all) { - meter.mark(); + meter.mark(val); } } } diff --git a/test/data/jmxdump/cassandra-4.0-jmx.yaml b/test/data/jmxdump/cassandra-4.0-jmx.yaml index 43fe3a03fbed..5490ffcd4170 100644 --- a/test/data/jmxdump/cassandra-4.0-jmx.yaml +++ b/test/data/jmxdump/cassandra-4.0-jmx.yaml @@ -101115,6 +101115,18 @@ org.apache.cassandra.metrics:type=Table,name=BloomFilterOffHeapMemoryUsed: - name: objectName parameters: [] returnType: javax.management.ObjectName +org.apache.cassandra.metrics:type=Table,name=BytesAnticompacted: + attributes: + - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} + operations: + - name: objectName + parameters: [] + returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesFlushed: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -101122,6 +101134,18 @@ org.apache.cassandra.metrics:type=Table,name=BytesFlushed: - name: objectName parameters: [] returnType: javax.management.ObjectName +org.apache.cassandra.metrics:type=Table,name=BytesMutatedAnticompaction: + attributes: + - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} + operations: + - name: objectName + parameters: [] + returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesPendingRepair: attributes: - {access: read-only, name: Value, type: java.lang.Object} diff --git a/test/data/jmxdump/cassandra-4.1-jmx.yaml b/test/data/jmxdump/cassandra-4.1-jmx.yaml index 1e85c6d48fcd..8623da7bf414 100644 --- a/test/data/jmxdump/cassandra-4.1-jmx.yaml +++ b/test/data/jmxdump/cassandra-4.1-jmx.yaml @@ -101115,6 +101115,18 @@ org.apache.cassandra.metrics:type=Table,name=BloomFilterOffHeapMemoryUsed: - name: objectName parameters: [] returnType: javax.management.ObjectName +org.apache.cassandra.metrics:type=Table,name=BytesAnticompacted: + attributes: + - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} + operations: + - name: objectName + parameters: [] + returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesFlushed: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -101122,6 +101134,18 @@ org.apache.cassandra.metrics:type=Table,name=BytesFlushed: - name: objectName parameters: [] returnType: javax.management.ObjectName +org.apache.cassandra.metrics:type=Table,name=BytesMutatedAnticompaction: + attributes: + - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} + operations: + - name: objectName + parameters: [] + returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesPendingRepair: attributes: - {access: read-only, name: Value, type: java.lang.Object} From 8d53dbde6716a69a5ad9c09574a0aae9d9734c6a Mon Sep 17 00:00:00 2001 From: Jaydeepkumar Chovatia Date: Fri, 20 Dec 2024 11:15:40 -0800 Subject: [PATCH 103/257] Add a global type=ColumnFamily --- test/data/jmxdump/cassandra-4.0-jmx.yaml | 24 ++++++++++++++++++++++++ test/data/jmxdump/cassandra-4.1-jmx.yaml | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/test/data/jmxdump/cassandra-4.0-jmx.yaml b/test/data/jmxdump/cassandra-4.0-jmx.yaml index 5490ffcd4170..e3c86eb84365 100644 --- a/test/data/jmxdump/cassandra-4.0-jmx.yaml +++ b/test/data/jmxdump/cassandra-4.0-jmx.yaml @@ -45622,6 +45622,18 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterOffHeapMemoryUsed - name: objectName parameters: [] returnType: javax.management.ObjectName +org.apache.cassandra.metrics:type=ColumnFamily,name=BytesAnticompacted: + attributes: + - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} + operations: + - name: objectName + parameters: [] + returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -45629,6 +45641,18 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: - name: objectName parameters: [] returnType: javax.management.ObjectName +org.apache.cassandra.metrics:type=ColumnFamily,name=BytesMutatedAnticompaction: + attributes: + - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} + operations: + - name: objectName + parameters: [] + returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesPendingRepair: attributes: - {access: read-only, name: Value, type: java.lang.Object} diff --git a/test/data/jmxdump/cassandra-4.1-jmx.yaml b/test/data/jmxdump/cassandra-4.1-jmx.yaml index 8623da7bf414..e1c141989ec2 100644 --- a/test/data/jmxdump/cassandra-4.1-jmx.yaml +++ b/test/data/jmxdump/cassandra-4.1-jmx.yaml @@ -45622,6 +45622,18 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterOffHeapMemoryUsed - name: objectName parameters: [] returnType: javax.management.ObjectName +org.apache.cassandra.metrics:type=ColumnFamily,name=BytesAnticompacted: + attributes: + - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} + operations: + - name: objectName + parameters: [] + returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: attributes: - {access: read-only, name: Value, type: java.lang.Object} @@ -45629,6 +45641,18 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: - name: objectName parameters: [] returnType: javax.management.ObjectName +org.apache.cassandra.metrics:type=ColumnFamily,name=BytesMutatedAnticompaction: + attributes: + - {access: read-only, name: Count, type: long} + - {access: read-only, name: FifteenMinuteRate, type: double} + - {access: read-only, name: FiveMinuteRate, type: double} + - {access: read-only, name: MeanRate, type: double} + - {access: read-only, name: OneMinuteRate, type: double} + - {access: read-only, name: RateUnit, type: java.lang.String} + operations: + - name: objectName + parameters: [] + returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesPendingRepair: attributes: - {access: read-only, name: Value, type: java.lang.Object} From a2fa4698f05a895edaba7db87200c9c5f34585e0 Mon Sep 17 00:00:00 2001 From: Jaydeepkumar Chovatia Date: Fri, 20 Dec 2024 11:18:21 -0800 Subject: [PATCH 104/257] Remove an extra empty line From b4040e5c377170379f180cd5bb224755adbbab46 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 20 Dec 2024 14:22:29 -0800 Subject: [PATCH 105/257] Revert the cassandra-4.x.xml changes --- test/data/jmxdump/cassandra-4.0-jmx.yaml | 768 +---------------------- test/data/jmxdump/cassandra-4.1-jmx.yaml | 768 +---------------------- 2 files changed, 8 insertions(+), 1528 deletions(-) diff --git a/test/data/jmxdump/cassandra-4.0-jmx.yaml b/test/data/jmxdump/cassandra-4.0-jmx.yaml index e3c86eb84365..e0d01272c83a 100644 --- a/test/data/jmxdump/cassandra-4.0-jmx.yaml +++ b/test/data/jmxdump/cassandra-4.0-jmx.yaml @@ -10190,11 +10190,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,n org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -10209,11 +10204,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,n org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11077,11 +11067,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11096,11 +11081,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11964,11 +11944,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11983,11 +11958,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -12851,11 +12821,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -12870,11 +12835,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -13738,11 +13698,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -13757,11 +13712,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -14625,11 +14575,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -14644,11 +14589,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -15512,11 +15452,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -15531,11 +15466,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -16399,11 +16329,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -16418,11 +16343,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -17286,11 +17206,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -17305,11 +17220,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18173,11 +18083,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18192,11 +18097,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19060,11 +18960,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19079,11 +18974,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19947,11 +19837,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,na org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19966,11 +19851,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,na org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -20834,11 +20714,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_st org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_statements,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -20853,11 +20728,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_st org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_statements,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -21721,11 +21591,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -21740,11 +21605,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -22608,11 +22468,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estima org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -22627,11 +22482,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estima org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -23495,11 +23345,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_act org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_activity,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -23514,11 +23359,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_act org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_activity,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -24382,11 +24222,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estim org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -24401,11 +24236,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estim org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25269,11 +25099,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25288,11 +25113,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -26156,11 +25976,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -26175,11 +25990,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -27043,11 +26853,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=view_builds org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=view_builds_in_progress,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -27925,11 +27730,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=networ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=network_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -29684,11 +29484,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_m org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_members,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -29703,11 +29498,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_m org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_members,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -30571,11 +30361,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_p org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -30590,11 +30375,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_p org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -31458,11 +31238,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles, org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -31477,11 +31252,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles, org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -33222,11 +32992,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope=repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -34104,11 +33869,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope=view_build_status,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -34986,11 +34746,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggr org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggregates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35005,11 +34760,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggr org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggregates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35873,11 +35623,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=colu org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35892,11 +35637,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=colu org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -36760,11 +36500,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=drop org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=dropped_columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -37642,11 +37377,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=func org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=functions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -37661,11 +37391,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=func org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=functions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -38529,11 +38254,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=inde org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=indexes,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -38548,11 +38268,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=inde org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=indexes,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -39416,11 +39131,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keys org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keyspaces,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -39435,11 +39145,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keys org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keyspaces,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40303,11 +40008,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tabl org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tables,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40322,11 +40022,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tabl org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tables,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -41190,11 +40885,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=trig org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=triggers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -41209,11 +40899,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=trig org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=triggers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42077,11 +41762,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=type org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=types,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42096,11 +41776,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=type org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=types,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42964,11 +42639,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=view org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42983,11 +42653,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=view org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -43851,11 +43516,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=even org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -43870,11 +43530,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=even org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -44738,11 +44393,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sess org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sessions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -44757,11 +44407,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sess org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sessions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -45624,12 +45269,7 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterOffHeapMemoryUsed returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesAnticompacted: attributes: - - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} + - {access: read-only, name: Value, type: java.lang.Object} operations: - name: objectName parameters: [] @@ -45643,12 +45283,7 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesMutatedAnticompaction: attributes: - - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} + - {access: read-only, name: Value, type: java.lang.Object} operations: - name: objectName parameters: [] @@ -57517,11 +57152,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=Blo org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -57536,11 +57166,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=Byt org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -58608,11 +58233,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -58627,11 +58247,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -59699,11 +59314,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -59718,11 +59328,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -60790,11 +60395,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=Bloom org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -60809,11 +60409,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=Bytes org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -61881,11 +61476,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -61900,11 +61490,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -62972,11 +62557,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -62991,11 +62571,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -64063,11 +63638,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -64082,11 +63652,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -65154,11 +64719,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -65173,11 +64733,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -66245,11 +65800,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -66264,11 +65814,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -67336,11 +66881,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -67355,11 +66895,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -68427,11 +67962,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -68446,11 +67976,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -69518,11 +69043,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=Bloo org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -69537,11 +69057,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=Byte org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -70609,11 +70124,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statement org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statements,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -70628,11 +70138,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statement org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statements,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -71700,11 +71205,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=Bloom org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -71719,11 +71219,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=Bytes org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -72791,11 +72286,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -72810,11 +72300,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -73882,11 +73367,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -73901,11 +73381,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -74973,11 +74448,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,na org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -74992,11 +74462,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,na org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -76064,11 +75529,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -76083,11 +75543,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -77155,11 +76610,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -77174,11 +76624,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -78246,11 +77691,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_pro org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_progress,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -78265,11 +77705,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_pro org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_progress,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -79337,11 +78772,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -79356,11 +78786,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -80428,11 +79853,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=resource_role org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=resource_role_permissons_index,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -81514,11 +80934,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members, org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -81533,11 +80948,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members, org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -82605,11 +82015,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -82624,11 +82029,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -83696,11 +83096,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=Bl org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -83715,11 +83110,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=By org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -84787,11 +84177,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=parent org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=parent_repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -85873,11 +85258,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -85892,11 +85272,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -86964,11 +86339,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=view_b org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=view_build_status,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -88050,11 +87420,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates, org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -88069,11 +87434,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates, org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -89141,11 +88501,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -89160,11 +88515,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -90232,11 +89582,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_col org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -90251,11 +89596,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_col org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -91323,11 +90663,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -91342,11 +90677,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -92414,11 +91744,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -92433,11 +91758,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -93505,11 +92825,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -93524,11 +92839,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -94596,11 +93906,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -94615,11 +93920,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -95687,11 +94987,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,na org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -95706,11 +95001,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,na org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -96778,11 +96068,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -96797,11 +96082,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -97869,11 +97149,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -97888,11 +97163,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -98960,11 +98230,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -98979,11 +98244,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -100051,11 +99311,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,na org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -100070,11 +99325,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,na org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -101141,12 +100391,7 @@ org.apache.cassandra.metrics:type=Table,name=BloomFilterOffHeapMemoryUsed: returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesAnticompacted: attributes: - - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} + - {access: read-only, name: Value, type: java.lang.Object} operations: - name: objectName parameters: [] @@ -101160,12 +100405,7 @@ org.apache.cassandra.metrics:type=Table,name=BytesFlushed: returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesMutatedAnticompaction: attributes: - - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} + - {access: read-only, name: Value, type: java.lang.Object} operations: - name: objectName parameters: [] diff --git a/test/data/jmxdump/cassandra-4.1-jmx.yaml b/test/data/jmxdump/cassandra-4.1-jmx.yaml index e1c141989ec2..a5ea2a74a16b 100644 --- a/test/data/jmxdump/cassandra-4.1-jmx.yaml +++ b/test/data/jmxdump/cassandra-4.1-jmx.yaml @@ -10190,11 +10190,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,n org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -10209,11 +10204,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,n org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=IndexInfo,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11077,11 +11067,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11096,11 +11081,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11964,11 +11944,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -11983,11 +11958,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_r org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=available_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -12851,11 +12821,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -12870,11 +12835,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=batches,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -13738,11 +13698,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -13757,11 +13712,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=built_views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -14625,11 +14575,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -14644,11 +14589,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=compaction_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -15512,11 +15452,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -15531,11 +15466,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=local,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -16399,11 +16329,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -16418,11 +16343,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=paxos,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -17286,11 +17206,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -17305,11 +17220,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18173,11 +18083,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -18192,11 +18097,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peer_events_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19060,11 +18960,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19079,11 +18974,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name= org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19947,11 +19837,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,na org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -19966,11 +19851,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,na org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=peers_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -20834,11 +20714,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_st org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_statements,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -20853,11 +20728,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_st org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=prepared_statements,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -21721,11 +21591,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -21740,11 +21605,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,nam org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=repairs,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -22608,11 +22468,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estima org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -22627,11 +22482,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estima org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=size_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -23495,11 +23345,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_act org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_activity_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -23514,11 +23359,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_act org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=sstable_activity_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -24382,11 +24222,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estim org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -24401,11 +24236,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estim org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=table_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25269,11 +25099,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -25288,11 +25113,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -26156,11 +25976,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -26175,11 +25990,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=transferred_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -27043,11 +26853,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=view_builds org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system,scope=view_builds_in_progress,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -27925,11 +27730,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=networ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=network_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -29684,11 +29484,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_m org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_members,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -29703,11 +29498,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_m org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_members,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -30571,11 +30361,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_p org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -30590,11 +30375,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_p org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=role_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -31458,11 +31238,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles, org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -31477,11 +31252,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles, org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_auth,scope=roles,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -33222,11 +32992,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope=repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -34104,11 +33869,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_distributed,scope=view_build_status,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -34986,11 +34746,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggr org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggregates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35005,11 +34760,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggr org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=aggregates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35873,11 +35623,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=colu org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -35892,11 +35637,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=colu org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -36760,11 +36500,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=drop org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=dropped_columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -37642,11 +37377,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=func org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=functions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -37661,11 +37391,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=func org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=functions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -38529,11 +38254,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=inde org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=indexes,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -38548,11 +38268,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=inde org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=indexes,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -39416,11 +39131,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keys org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keyspaces,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -39435,11 +39145,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keys org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=keyspaces,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40303,11 +40008,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tabl org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tables,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -40322,11 +40022,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tabl org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=tables,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -41190,11 +40885,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=trig org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=triggers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -41209,11 +40899,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=trig org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=triggers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42077,11 +41762,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=type org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=types,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42096,11 +41776,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=type org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=types,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42964,11 +42639,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=view org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -42983,11 +42653,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=view org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_schema,scope=views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -43851,11 +43516,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=even org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -43870,11 +43530,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=even org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -44738,11 +44393,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sess org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sessions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -44757,11 +44407,6 @@ org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sess org.apache.cassandra.metrics:type=ColumnFamily,keyspace=system_traces,scope=sessions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -45624,12 +45269,7 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterOffHeapMemoryUsed returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesAnticompacted: attributes: - - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} + - {access: read-only, name: Value, type: java.lang.Object} operations: - name: objectName parameters: [] @@ -45643,12 +45283,7 @@ org.apache.cassandra.metrics:type=ColumnFamily,name=BytesFlushed: returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=ColumnFamily,name=BytesMutatedAnticompaction: attributes: - - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} + - {access: read-only, name: Value, type: java.lang.Object} operations: - name: objectName parameters: [] @@ -57517,11 +57152,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=Blo org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -57536,11 +57166,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=Byt org.apache.cassandra.metrics:type=Table,keyspace=system,scope=IndexInfo,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -58608,11 +58233,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -58627,11 +58247,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,n org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -59699,11 +59314,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -59718,11 +59328,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=available_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -60790,11 +60395,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=Bloom org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -60809,11 +60409,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=Bytes org.apache.cassandra.metrics:type=Table,keyspace=system,scope=batches,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -61881,11 +61476,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -61900,11 +61490,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=built_views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -62972,11 +62557,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -62991,11 +62571,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history org.apache.cassandra.metrics:type=Table,keyspace=system,scope=compaction_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -64063,11 +63638,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -64082,11 +63652,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=local,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -65154,11 +64719,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -65173,11 +64733,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=paxos,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -66245,11 +65800,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -66264,11 +65814,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=B org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -67336,11 +66881,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -67355,11 +66895,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peer_events_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -68427,11 +67962,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BloomFi org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -68446,11 +67976,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesFl org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -69518,11 +69043,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=Bloo org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -69537,11 +69057,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=Byte org.apache.cassandra.metrics:type=Table,keyspace=system,scope=peers_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -70609,11 +70124,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statement org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statements,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -70628,11 +70138,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statement org.apache.cassandra.metrics:type=Table,keyspace=system,scope=prepared_statements,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -71700,11 +71205,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=Bloom org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -71719,11 +71219,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=Bytes org.apache.cassandra.metrics:type=Table,keyspace=system,scope=repairs,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -72791,11 +72286,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -72810,11 +72300,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,nam org.apache.cassandra.metrics:type=Table,keyspace=system,scope=size_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -73882,11 +73367,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -73901,11 +73381,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity_v org.apache.cassandra.metrics:type=Table,keyspace=system,scope=sstable_activity_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -74973,11 +74448,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,na org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -74992,11 +74462,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,na org.apache.cassandra.metrics:type=Table,keyspace=system,scope=table_estimates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -76064,11 +75529,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -76083,11 +75543,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -77155,11 +76610,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges_v2,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -77174,11 +76624,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges org.apache.cassandra.metrics:type=Table,keyspace=system,scope=transferred_ranges_v2,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -78246,11 +77691,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_pro org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_progress,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -78265,11 +77705,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_pro org.apache.cassandra.metrics:type=Table,keyspace=system,scope=view_builds_in_progress,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -79337,11 +78772,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -79356,11 +78786,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=network_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -80428,11 +79853,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=resource_role org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=resource_role_permissons_index,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -81514,11 +80934,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members, org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -81533,11 +80948,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members, org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_members,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -82605,11 +82015,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -82624,11 +82029,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissi org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=role_permissions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -83696,11 +83096,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=Bl org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -83715,11 +83110,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=By org.apache.cassandra.metrics:type=Table,keyspace=system_auth,scope=roles,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -84787,11 +84177,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=parent org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=parent_repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -85873,11 +85258,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair_history,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -85892,11 +85272,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=repair_history,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -86964,11 +86339,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=view_b org.apache.cassandra.metrics:type=Table,keyspace=system_distributed,scope=view_build_status,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -88050,11 +87420,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates, org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -88069,11 +87434,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates, org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=aggregates,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -89141,11 +88501,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -89160,11 +88515,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -90232,11 +89582,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_col org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_columns,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -90251,11 +89596,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_col org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=dropped_columns,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -91323,11 +90663,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -91342,11 +90677,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=functions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -92414,11 +91744,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -92433,11 +91758,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,nam org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=indexes,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -93505,11 +92825,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -93524,11 +92839,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,n org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=keyspaces,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -94596,11 +93906,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -94615,11 +93920,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=tables,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -95687,11 +94987,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,na org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -95706,11 +95001,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,na org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=triggers,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -96778,11 +96068,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -96797,11 +96082,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=types,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -97869,11 +97149,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -97888,11 +97163,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name= org.apache.cassandra.metrics:type=Table,keyspace=system_schema,scope=views,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -98960,11 +98230,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -98979,11 +98244,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=events,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -100051,11 +99311,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,na org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,name=BytesAnticompacted: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -100070,11 +99325,6 @@ org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,na org.apache.cassandra.metrics:type=Table,keyspace=system_traces,scope=sessions,name=BytesMutatedAnticompaction: attributes: - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} operations: - name: objectName parameters: [] @@ -101141,12 +100391,7 @@ org.apache.cassandra.metrics:type=Table,name=BloomFilterOffHeapMemoryUsed: returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesAnticompacted: attributes: - - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} + - {access: read-only, name: Value, type: java.lang.Object} operations: - name: objectName parameters: [] @@ -101160,12 +100405,7 @@ org.apache.cassandra.metrics:type=Table,name=BytesFlushed: returnType: javax.management.ObjectName org.apache.cassandra.metrics:type=Table,name=BytesMutatedAnticompaction: attributes: - - {access: read-only, name: Count, type: long} - - {access: read-only, name: FifteenMinuteRate, type: double} - - {access: read-only, name: FiveMinuteRate, type: double} - - {access: read-only, name: MeanRate, type: double} - - {access: read-only, name: OneMinuteRate, type: double} - - {access: read-only, name: RateUnit, type: java.lang.String} + - {access: read-only, name: Value, type: java.lang.Object} operations: - name: objectName parameters: [] From 3d59a7add8e4384cfd6bc8599dd9be35f6a6dd13 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Fri, 20 Dec 2024 21:54:44 -0800 Subject: [PATCH 106/257] gauge compatible meter --- .../metrics/CassandraMetricsRegistry.java | 52 +++++++++++++++++-- .../cassandra/metrics/TableMetrics.java | 15 ++++-- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java index 11b36c606a3a..58540f903fde 100644 --- a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java +++ b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java @@ -305,9 +305,14 @@ public Counter counter(MetricName... name) } public Meter meter(MetricName... name) + { + return meter(false, name); + } + + public Meter meter(boolean gaugeCompatible, MetricName... name) { Meter meter = super.meter(name[0].getMetricName()); - Stream.of(name).forEach(n -> register(n, meter)); + Stream.of(name).forEach(n -> register(gaugeCompatible, n, meter)); return meter; } @@ -374,6 +379,11 @@ public static SnapshottingReservoir createReservoir(TimeUnit durationUnit) } public T register(MetricName name, T metric) + { + return register(false, name, metric); + } + + public T register(boolean gaugeCompatible, MetricName name, T metric) { if (metric instanceof MetricSet) throw new IllegalArgumentException("MetricSet registration using MetricName is not supported"); @@ -381,7 +391,7 @@ public T register(MetricName name, T metric) try { verifyUnknownMetric(name); - registerMBean(metric, name.getMBeanName(), MBeanWrapper.instance); + registerMBean(metric, name.getMBeanName(), MBeanWrapper.instance, gaugeCompatible); return super.register(name.getMetricName(), metric); } catch (IllegalArgumentException e) @@ -495,7 +505,7 @@ public interface MetricNameResolver @Nullable String resolve(String fullName); } - private void registerMBean(Metric metric, ObjectName name, MBeanWrapper mBeanServer) + public void registerMBean(Metric metric, ObjectName name, MBeanWrapper mBeanServer, boolean gaugeCompatible) { AbstractBean mbean; @@ -508,7 +518,18 @@ else if (metric instanceof Histogram) else if (metric instanceof Timer) mbean = new JmxTimer((Timer) metric, name, TimeUnit.SECONDS, DEFAULT_TIMER_UNIT); else if (metric instanceof Metered) - mbean = new JmxMeter((Metered) metric, name, TimeUnit.SECONDS); + { + // If a gauge compatible meter is requested, create a special implementation which + // also yields a 'Value' attribute for backwards compatibility. + if (gaugeCompatible) + { + mbean = new JmxMeterGaugeCompatible((Metered) metric, name, TimeUnit.SECONDS); + } + else + { + mbean = new JmxMeter((Metered) metric, name, TimeUnit.SECONDS); + } + } else throw new IllegalArgumentException("Unknown metric type: " + metric.getClass()); @@ -820,6 +841,29 @@ private String calculateRateUnit(TimeUnit unit) } } + public interface JmxMeterGaugeCompatibleMBean extends JmxMeterMBean, JmxGaugeMBean {} + + /** + * An implementation of {@link JmxMeter} that is compatible with {@link JmxGaugeMBean} in that it also + * implements {@link JmxGaugeMBean}. This is useful for metrics that were migrated from {@link JmxGauge} + * to {@link JmxMeter} like {@link TableMetrics#bytesAnticompacted} and + * {@link TableMetrics#bytesMutatedAnticompaction}. + */ + private static class JmxMeterGaugeCompatible extends JmxMeter implements JmxMeterGaugeCompatibleMBean + { + + private JmxMeterGaugeCompatible(Metered metric, ObjectName objectName, TimeUnit rateUnit) + { + super(metric, objectName, rateUnit); + } + + @Override + public Object getValue() + { + return getCount(); + } + } + /** * Exports a timer as a JMX MBean, check corresponding {@link org.apache.cassandra.db.virtual.model.TimerMetricRow} * for the same functionality for virtual tables. diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index 29a494067d96..2622b76daa4a 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -837,8 +837,8 @@ public Long getValue() bytesValidated = createTableHistogram("BytesValidated", cfs.keyspace.metric.bytesValidated, false); partitionsValidated = createTableHistogram("PartitionsValidated", cfs.keyspace.metric.partitionsValidated, false); - bytesAnticompacted = createTableMeter("BytesAnticompacted", cfs.keyspace.metric.bytesAnticompacted); - bytesMutatedAnticompaction = createTableMeter("BytesMutatedAnticompaction", cfs.keyspace.metric.bytesMutatedAnticompaction); + bytesAnticompacted = createTableMeter("BytesAnticompacted", cfs.keyspace.metric.bytesAnticompacted, true); + bytesMutatedAnticompaction = createTableMeter("BytesMutatedAnticompaction", cfs.keyspace.metric.bytesMutatedAnticompaction, true); bytesPreviewed = createTableMeter("BytesPreviewed", cfs.keyspace.metric.bytesPreviewed); tokenRangesPreviewedDesynchronized = createTableMeter("TokenRangesPreviewedDesynchronized", cfs.keyspace.metric.tokenRangesPreviewedDesynchronized); bytesPreviewedDesynchronized = createTableMeter("BytesPreviewedDesynchronized", cfs.keyspace.metric.bytesPreviewedDesynchronized); @@ -1112,16 +1112,21 @@ protected SnapshottingTimer createTableTimer(String name) protected TableMeter createTableMeter(String name, Meter keyspaceMeter) { - return createTableMeter(name, name, keyspaceMeter); + return createTableMeter(name, keyspaceMeter, false); } - protected TableMeter createTableMeter(String name, String alias, Meter keyspaceMeter) + protected TableMeter createTableMeter(String name, Meter keyspaceMeter, boolean globalMeterGaugeCompatible) + { + return createTableMeter(name, name, keyspaceMeter, globalMeterGaugeCompatible); + } + + protected TableMeter createTableMeter(String name, String alias, Meter keyspaceMeter, boolean globalMeterGaugeCompatible) { Meter meter = Metrics.meter(factory.createMetricName(name), aliasFactory.createMetricName(alias)); register(name, alias, meter); return new TableMeter(meter, keyspaceMeter, - Metrics.meter(GLOBAL_FACTORY.createMetricName(name), + Metrics.meter(globalMeterGaugeCompatible, GLOBAL_FACTORY.createMetricName(name), GLOBAL_ALIAS_FACTORY.createMetricName(alias))); } From 54fa2474d476ce0e0237bb646015641765b59ba5 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 20 Dec 2024 08:05:01 -0800 Subject: [PATCH 107/257] Implement minimum repair task duration setting for auto-repair scheduler --- .../repair/autorepair/AutoRepair.java | 18 +++++-- .../repair/autorepair/AutoRepairConfig.java | 14 ++++++ .../cassandra/service/AutoRepairService.java | 7 +++ .../service/AutoRepairServiceMBean.java | 2 + .../org/apache/cassandra/tools/NodeProbe.java | 5 ++ .../tools/nodetool/GetAutoRepairConfig.java | 1 + .../tools/nodetool/SetAutoRepairConfig.java | 6 ++- .../autorepair/AutoRepairConfigTest.java | 10 +++- .../AutoRepairParameterizedTest.java | 47 +++++++++++++++++++ .../nodetool/SetAutoRepairConfigTest.java | 11 ++++- 10 files changed, 115 insertions(+), 6 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index d6d58746f45f..50d8e9e5dee8 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -319,8 +319,8 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim while (retryCount <= config.getRepairMaxRetries()) { RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, - Lists.newArrayList(curRepairAssignment.getTableNames()), - ranges, primaryRangeOnly); + Lists.newArrayList(curRepairAssignment.getTableNames()), + ranges, primaryRangeOnly); repairState.resetWaitCondition(); f = repairRunnableExecutors.get(repairType).submit(task); try @@ -489,6 +489,18 @@ public AutoRepairState getRepairState(AutoRepairConfig.RepairType repairType) return repairStates.get(repairType); } + private void soakAfterRepair(long startTimeMilis, long minDurationMilis) + { + long currentTime = timeFunc.get(); + long timeElapsed = currentTime - startTimeMilis; + if (timeElapsed < minDurationMilis) + { + long timeToSoak = minDurationMilis - timeElapsed; + logger.info("Soaking for {} ms after repair", timeToSoak); + sleepFunc.accept(timeToSoak, TimeUnit.MILLISECONDS); + } + } + static class CollectedRepairStats { int failedTokenRanges = 0; @@ -496,4 +508,4 @@ static class CollectedRepairStats int skippedTokenRanges = 0; int skippedTables = 0; } -} +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 8eaef0b6ecc5..c3f39c1878bc 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -52,6 +52,10 @@ public class AutoRepairConfig implements Serializable public volatile Integer repair_max_retries = 3; // the backoff time in seconds for retrying a repair session. public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); + // the minimum duration for the execution of a single repair task (i.e.: RepairRunnable). + // This helps prevent the auto-repair scheduler from overwhelming the node by scheduling a large number of + // repair tasks in a short period of time. + public volatile DurationSpec.LongSecondsBound repair_task_min_duration = new DurationSpec.LongSecondsBound("5s"); // global_settings overides Options.defaultOptions for all repair types public volatile Options global_settings; @@ -160,6 +164,16 @@ public void setRepairRetryBackoff(String interval) repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); } + public DurationSpec.LongSecondsBound getRepairTaskMinDuration() + { + return repair_task_min_duration; + } + + public void setRepairTaskMinDuration(String duration) + { + repair_task_min_duration = new DurationSpec.LongSecondsBound(duration); + } + public boolean isAutoRepairEnabled(RepairType repairType) { return enabled && applyOverrides(repairType, opt -> opt.enabled); diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index d8fd68253596..5e8bbee6eb90 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -18,6 +18,7 @@ package org.apache.cassandra.service; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; @@ -142,6 +143,12 @@ public void setAutoRepairRetryBackoff(String interval) config.setRepairRetryBackoff(interval); } + @Override + public void setAutoRepairMinRepairTaskDuration(String duration) + { + config.setRepairTaskMinDuration(duration); + } + @Override public void setRepairSSTableCountHigherThreshold(RepairType repairType, int sstableHigherThreshold) { diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 121c9a480303..cac2680c3f01 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -50,6 +50,8 @@ public interface AutoRepairServiceMBean public void setAutoRepairRetryBackoff(String interval); + public void setAutoRepairMinRepairTaskDuration(String duration); + public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssTableHigherThreshold); public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime); diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 5417c127e5e0..9a945f406d18 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2622,6 +2622,11 @@ public void setAutoRepairRetryBackoff(String interval) autoRepairProxy.setAutoRepairRetryBackoff(interval); } + public void setAutoRepairMinRepairTaskDuration(String duration) + { + autoRepairProxy.setAutoRepairMinRepairTaskDuration(duration); + } + public void setRepairSSTableCountHigherThreshold(AutoRepairConfig.RepairType repairType, int ssTableHigherThreshold) { autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index 48f8d54de3f6..681b8533824b 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -49,6 +49,7 @@ public void execute(NodeProbe probe) sb.append("\n\tTTL for repair history for dead nodes: " + config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); sb.append("\n\tmax retries for repair: " + config.getRepairMaxRetries()); sb.append("\n\tretry backoff: " + config.getRepairRetryBackoff()); + sb.append("\n\tmin repair job duration: " + config.getRepairTaskMinDuration().toSeconds() + " seconds"); for (RepairType repairType : RepairType.values()) { sb.append(formatRepairTypeConfig(probe, repairType, config)); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 2929c944442a..75cdea6cd1c0 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -46,7 +46,8 @@ public class SetAutoRepairConfig extends NodeToolCmd "[start_scheduler|number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + - "|parallel_repair_count|parallel_repair_percentage|mv_repair_enabled|repair_max_retries|repair_retry_backoff|repair_session_timeout]", + "|parallel_repair_count|parallel_repair_percentage|mv_repair_enabled|repair_max_retries" + + "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration]", required = true) protected List args = new ArrayList<>(); @@ -88,6 +89,9 @@ public void execute(NodeProbe probe) case "repair_retry_backoff": probe.setAutoRepairRetryBackoff(paramVal); return; + case "min_repair_task_duration": + probe.setAutoRepairMinRepairTaskDuration(paramVal); + return; default: // proceed to options that require --repair-type option break; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 08ae9631a192..79b371fca347 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -90,6 +90,15 @@ public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsEnabled() assertTrue(config.isAutoRepairEnabled(repairType)); } + @Test + public void testRepairMinDuration() + { + config = new AutoRepairConfig(false); + + config.setRepairTaskMinDuration("3s"); + assertEquals(3L, config.getRepairTaskMinDuration().toSeconds()); + } + @Test public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() { @@ -98,7 +107,6 @@ public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() assertFalse(config.isAutoRepairEnabled(repairType)); } - @Test public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledForRepairType() { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 4e12fc497210..b0da539f85a8 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -178,6 +178,7 @@ public void setup() timeFuncCalls = 0; AutoRepair.timeFunc = System::currentTimeMillis; + AutoRepair.sleepFunc = (Long startTime, TimeUnit unit) -> {}; resetCounters(); resetConfig(); @@ -220,6 +221,7 @@ private void resetConfig() config.global_settings = defaultConfig.global_settings; config.history_clear_delete_hosts_buffer_interval = defaultConfig.history_clear_delete_hosts_buffer_interval; config.setRepairSubRangeNum(repairType, 1); + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); } private void executeCQL() @@ -751,4 +753,49 @@ public void testRepairThrowsForIRWithCDCReplay() AutoRepair.instance.repair(repairType); } } + + + @Test + public void testSoakAfterImmediateRepair() + { + when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); + when(autoRepairState.isSuccess()).thenReturn(true); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("1s"); + AtomicInteger sleepCalls = new AtomicInteger(); + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + sleepCalls.getAndIncrement(); + assertEquals(TimeUnit.MILLISECONDS, unit); + assertTrue(config.getRepairTaskMinDuration().toMilliseconds() >= duration); + }; + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + + AutoRepair.instance.repair(repairType); + + assertEquals(expectedTablesGoingThroughRepair, sleepCalls.get()); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedTablesGoingThroughRepair); + verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + } + + @Test + public void testNoSoakAfterRepair() + { + when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); + when(autoRepairState.isSuccess()).thenReturn(true); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + fail("Should not sleep after repair"); + }; + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + + AutoRepair.instance.repair(repairType); + + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedTablesGoingThroughRepair); + verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + } } diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 6e2d79def026..e12af34af8d8 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -105,7 +105,6 @@ public void testRepairMaxRetries() verify(probe, times(1)).setAutoRepairMaxRetriesCount(2); } - @Test public void testRetryBackoffInSec() { @@ -131,6 +130,16 @@ public void testStartScheduler() verify(probe, times(1)).startScheduler(); } + + @Test + public void testMinRepairDuration() + { + cmd.args = ImmutableList.of("min_repair_task_duration", "4s"); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairMinRepairTaskDuration("4s"); + } } @RunWith(Parameterized.class) From 8d8ed47947d544d3ccc6289d9548af8a84d9f878 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 20 Dec 2024 08:09:45 -0800 Subject: [PATCH 108/257] Cleanup --- .../cassandra/repair/autorepair/AutoRepairParameterizedTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index b0da539f85a8..427fdfeae4a9 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -105,7 +105,6 @@ public class AutoRepairParameterizedTest extends CQLTester // Expected number of tables that should be repaired. private static int expectedTablesGoingThroughRepair; - @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; From 18171b3e29dc054ae646ec44e91b9b0de2411385 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Fri, 20 Dec 2024 08:17:22 -0800 Subject: [PATCH 109/257] Cleanup --- src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java | 2 ++ .../repair/autorepair/AutoRepairParameterizedTest.java | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 50d8e9e5dee8..df0db2854355 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -325,7 +325,9 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim f = repairRunnableExecutors.get(repairType).submit(task); try { + long jobStartTime = timeFunc.get(); repairState.waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); + soakAfterRepair(jobStartTime, config.getRepairTaskMinDuration().toMilliseconds()); } catch (InterruptedException e) { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 427fdfeae4a9..7cf05a70a890 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -753,7 +753,6 @@ public void testRepairThrowsForIRWithCDCReplay() } } - @Test public void testSoakAfterImmediateRepair() { From 7915e5522920d03ecc76670769a4e36a64076894 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Sat, 21 Dec 2024 08:29:55 -0800 Subject: [PATCH 110/257] Address comment --- .../repair/autorepair/AutoRepairParameterizedTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 7cf05a70a890..2d787bd88b72 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -759,19 +759,20 @@ public void testSoakAfterImmediateRepair() when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); when(autoRepairState.isSuccess()).thenReturn(true); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.repair_task_min_duration = new DurationSpec.LongSecondsBound("1s"); + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("10s"); AtomicInteger sleepCalls = new AtomicInteger(); AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.MILLISECONDS, unit); assertTrue(config.getRepairTaskMinDuration().toMilliseconds() >= duration); + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); }; config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repairStates.put(repairType, autoRepairState); AutoRepair.instance.repair(repairType); - assertEquals(expectedTablesGoingThroughRepair, sleepCalls.get()); + assertEquals(1, sleepCalls.get()); verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedTablesGoingThroughRepair); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); From b60f6ce1da2d70eed37eb973a2389957849ad46e Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sat, 21 Dec 2024 10:22:33 -0800 Subject: [PATCH 111/257] Remove unused imports --- src/java/org/apache/cassandra/service/AutoRepairService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 5e8bbee6eb90..167e4507a7c1 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; From 97f764af0ddb89e4dfec0ed873e07f7dc254c79a Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sat, 21 Dec 2024 10:37:33 -0800 Subject: [PATCH 112/257] formatting --- src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index df0db2854355..1bd49e1b214a 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -510,4 +510,4 @@ static class CollectedRepairStats int skippedTokenRanges = 0; int skippedTables = 0; } -} \ No newline at end of file +} From c5b8768ebec299f74b40fb8055e35238a4b61c44 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:56:35 -0600 Subject: [PATCH 113/257] Make RepairTokenRangeSplitter the default implementation Summary of impacting changes: * RepairTokenRangeSplitter is now the default in favor of FixedSplitTokenRangeSplitter. * number_of_subranges moved from a repair_type_override/global config to a property of FixedSplitTokenRangeSplitter. * [get|set]autorepairconfig usability changes Detailed breakdown of changes: 0. IAutoRepairTokenRangeSplitter changes: * Move RepairType from an parameter in getRepairAssignments to a parameter in the constructor for implementations. This was done because the RepairType is always the same for a splitter instance. * Add setParameter and getParameter methods which are used by setautorepairconfig and getautorepairconfig to dynamically update splitter configuration. 1. [get|set]autorepairconfig changes: * getautorepairconfig output now shows property names instead of human readable names (e.g. repair_check_interval instead of repair eligibility check interval). This was done to make it more intuitive to know what properties to use for setautorepairconfig. * getautorepairconfig and setautorepairconfig now support viewing and changing splitter properties, e.g.: setautorepairconfig token_range_splitter.max_bytes_per_schedule 500GiB -t full 2. RepairTokenRangeSplitter changes: * Renames RepairRangeSplitter to RepairTokenRangeSplitter and makes it the default implementation. * Establishes defaults for each repair type to be sensible. * Improve javadocs detailing the primary goal of the splitter, its configuration and its defaults and the justifications for using them. * Rename variables to be consistent with their setting names. 3. FixedSplitTokenRangeSplitter changes: * Renames DefaultAutoRepairTokenSplitter to FixedSplitTokenRangeSplitter as it is no longer the default. * Move number_of_subranges from a global config to a property for this splitter. 4. RepairAssignmentIterator * Refactored common code from both splitter implementations into RepairAssignmentIterator with the aim to reduce the amount of boiler plate custom splitter implementations need to implement. 5. Test changes * Fix AutoRepairParameterizedTest to use fixed splitter so we get a deterministic repair plan. * Allow splitter to be changed programmatically, only expect it to be used for tests. * Rename CassandraSreamReceiverTest and fix it Whether streaming cdc/mvs into commitlogs was previously dependent on system properties; update the test to account for the new yaml properties. * Fix dtest after CASSANDRA-20160 The introduction of repair_task_min_duration causes repairs to take cumulatively longer for a node than 2 minutes. To resolve this, set that to 0s, and also enable repair_by_keyspace and set subranges to 1 to reduce the overall number of repairs. Patch by Andy Tolbert; reviewed by ___ for CASSANDRA-20179 --- .../db/streaming/CassandraStreamReceiver.java | 2 +- .../repair/autorepair/AutoRepair.java | 53 +- .../repair/autorepair/AutoRepairConfig.java | 95 +++- .../repair/autorepair/AutoRepairUtils.java | 2 +- ...java => FixedSplitTokenRangeSplitter.java} | 109 ++-- .../IAutoRepairTokenRangeSplitter.java | 46 +- .../autorepair/PrioritizedRepairPlan.java | 2 +- .../autorepair/RepairAssignmentIterator.java | 84 +++ ...ter.java => RepairTokenRangeSplitter.java} | 513 ++++++++++++++---- .../cassandra/service/AutoRepairService.java | 19 +- .../service/AutoRepairServiceMBean.java | 7 +- .../org/apache/cassandra/tools/NodeProbe.java | 15 +- .../tools/nodetool/GetAutoRepairConfig.java | 71 ++- .../tools/nodetool/SetAutoRepairConfig.java | 23 +- .../apache/cassandra/utils/FBUtilities.java | 28 - .../test/repair/AutoRepairSchedulerTest.java | 8 +- .../config/DatabaseDescriptorRefTest.java | 10 +- ....java => CassandraStreamReceiverTest.java} | 45 +- .../autorepair/AutoRepairConfigTest.java | 25 +- ...DefaultTokenSplitterParameterizedTest.java | 13 +- .../AutoRepairParameterizedTest.java | 11 +- ...java => RepairTokenRangeSplitterTest.java} | 74 +-- .../service/AutoRepairServiceSetterTest.java | 1 - .../nodetool/SetAutoRepairConfigTest.java | 21 +- 24 files changed, 893 insertions(+), 384 deletions(-) rename src/java/org/apache/cassandra/repair/autorepair/{DefaultAutoRepairTokenSplitter.java => FixedSplitTokenRangeSplitter.java} (55%) create mode 100644 src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java rename src/java/org/apache/cassandra/repair/autorepair/{RepairRangeSplitter.java => RepairTokenRangeSplitter.java} (57%) rename test/unit/org/apache/cassandra/db/streaming/{CassandraSreamReceiverTest.java => CassandraStreamReceiverTest.java} (74%) rename test/unit/org/apache/cassandra/repair/autorepair/{RepairRangeSplitterTest.java => RepairTokenRangeSplitterTest.java} (78%) diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 3d63ca016d85..2c31b175679d 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -193,7 +193,7 @@ private boolean hasCDC(ColumnFamilyStore cfs) return cfs.metadata().params.cdc; } - // returns true iif it is a cdc table and cdc on repair is enabled. + // returns true if it is a cdc table and cdc on repair is enabled. private boolean cdcRequiresWriteCommitLog(ColumnFamilyStore cfs) { return DatabaseDescriptor.isCDCOnRepairEnabled() && hasCDC(cfs); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 1bd49e1b214a..52324f917106 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -19,6 +19,7 @@ package org.apache.cassandra.repair.autorepair; import java.util.ArrayList; +import java.util.Collections; import java.util.EnumMap; import java.util.HashSet; import java.util.Iterator; @@ -39,6 +40,7 @@ import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Clock; @@ -91,8 +93,6 @@ public class AutoRepair @VisibleForTesting protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; - protected final Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); - private boolean isSetupDone = false; @VisibleForTesting @@ -107,7 +107,43 @@ protected AutoRepair() repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); - tokenRangeSplitters.put(repairType, FBUtilities.newAutoRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType))); + } + } + + @VisibleForTesting + static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(AutoRepairConfig.RepairType repairType, ParameterizedClass parameterizedClass) throws ConfigurationException + { + try + { + Class tokenRangeSplitterClass; + final String className; + if (parameterizedClass.class_name != null && !parameterizedClass.class_name.isEmpty()) + { + className = parameterizedClass.class_name.contains(".") ? + parameterizedClass.class_name : + "org.apache.cassandra.repair.autorepair." + parameterizedClass.class_name; + tokenRangeSplitterClass = FBUtilities.classForName(className, "token_range_splitter"); + } + else + { + // If token_range_splitter.class_name is not defined, just use default, this is for convenience. + tokenRangeSplitterClass = AutoRepairConfig.DEFAULT_SPLITTER; + } + try + { + Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); + // first attempt to initialize with RepairType and Map arguments. + return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(AutoRepairConfig.RepairType.class, Map.class).newInstance(repairType, parameters); + } + catch (NoSuchMethodException nsme) + { + // fall back on no argument constructor. + return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); + } + } + catch (Exception ex) + { + throw new ConfigurationException("Unable to create instance of IAutoRepairTokenRangeSplitter", ex); } } @@ -149,15 +185,6 @@ public void repairAsync(AutoRepairConfig.RepairType repairType) repairExecutors.get(repairType).submit(() -> repair(repairType)); } - /** - * @return The priority of the given table if defined, otherwise 0. - */ - private int getPriority(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName) - { - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); - return cfs != null ? cfs.metadata().params.autoRepair.priority() : 0; - } - // repair runs a repair session of the given type synchronously. public void repair(AutoRepairConfig.RepairType repairType) { @@ -228,7 +255,7 @@ public void repair(AutoRepairConfig.RepairType repairType) List repairPlans = PrioritizedRepairPlan.build(keyspacesAndTablesToRepair, repairType, shuffleFunc); // calculate the repair assignments for each priority:keyspace. - Iterator repairAssignmentsIterator = tokenRangeSplitters.get(repairType).getRepairAssignments(repairType, primaryRangeOnly, repairPlans); + Iterator repairAssignmentsIterator = config.getTokenRangeSplitterInstance(repairType).getRepairAssignments(primaryRangeOnly, repairPlans); while (repairAssignmentsIterator.hasNext()) { diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index c3f39c1878bc..2e1d5f0a7276 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -20,7 +20,9 @@ import java.io.Serializable; import java.util.Collections; +import java.util.EnumMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentMap; import java.util.function.Function; @@ -33,6 +35,8 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.utils.LocalizeString; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.utils.FBUtilities; public class AutoRepairConfig implements Serializable { @@ -60,6 +64,11 @@ public class AutoRepairConfig implements Serializable // global_settings overides Options.defaultOptions for all repair types public volatile Options global_settings; + public static final Class DEFAULT_SPLITTER = RepairTokenRangeSplitter.class; + + // make transient so gets consturcted in the implementation. + private final transient Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); + public enum RepairType implements Serializable { FULL, @@ -204,16 +213,6 @@ public void setRepairThreads(RepairType repairType, int repairThreads) getOptions(repairType).number_of_repair_threads = repairThreads; } - public int getRepairSubRangeNum(RepairType repairType) - { - return applyOverrides(repairType, opt -> opt.number_of_subranges); - } - - public void setRepairSubRangeNum(RepairType repairType, int repairSubRanges) - { - getOptions(repairType).number_of_subranges = repairSubRanges; - } - public DurationSpec.IntSecondsBound getRepairMinInterval(RepairType repairType) { return applyOverrides(repairType, opt -> opt.min_repair_interval); @@ -309,6 +308,26 @@ public ParameterizedClass getTokenRangeSplitter(RepairType repairType) return applyOverrides(repairType, opt -> opt.token_range_splitter); } + /** + * Set a new token range splitter, this is not meant to be used other than for testing. + */ + public void setTokenRangeSplitter(RepairType repairType, ParameterizedClass tokenRangeSplitter) + { + getOptions(repairType).token_range_splitter = tokenRangeSplitter; + tokenRangeSplitters.remove(repairType); + } + + public IAutoRepairTokenRangeSplitter getTokenRangeSplitterInstance(RepairType repairType) + { + IAutoRepairTokenRangeSplitter splitter = tokenRangeSplitters.get(repairType); + if (splitter == null) + { + splitter = newAutoRepairTokenRangeSplitter(repairType, getTokenRangeSplitter(repairType)); + tokenRangeSplitters.put(repairType, splitter); + } + return splitter; + } + public void setInitialSchedulerDelay(RepairType repairType, String initialSchedulerDelay) { getOptions(repairType).initial_scheduler_delay = new DurationSpec.IntSecondsBound(initialSchedulerDelay); @@ -329,6 +348,43 @@ public void setRepairSessionTimeout(RepairType repairType, String repairSessionT getOptions(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); } + @VisibleForTesting + static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(AutoRepairConfig.RepairType repairType, ParameterizedClass parameterizedClass) throws ConfigurationException + { + try + { + Class tokenRangeSplitterClass; + final String className; + if (parameterizedClass.class_name != null && !parameterizedClass.class_name.isEmpty()) + { + className = parameterizedClass.class_name.contains(".") ? + parameterizedClass.class_name : + "org.apache.cassandra.repair.autorepair." + parameterizedClass.class_name; + tokenRangeSplitterClass = FBUtilities.classForName(className, "token_range_splitter"); + } + else + { + // If token_range_splitter.class_name is not defined, just use default, this is for convenience. + tokenRangeSplitterClass = AutoRepairConfig.DEFAULT_SPLITTER; + } + try + { + Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); + // first attempt to initialize with RepairType and Map arguments. + return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(AutoRepairConfig.RepairType.class, Map.class).newInstance(repairType, parameters); + } + catch (NoSuchMethodException nsme) + { + // fall back on no argument constructor. + return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); + } + } + catch (Exception ex) + { + throw new ConfigurationException("Unable to create instance of IAutoRepairTokenRangeSplitter", ex); + } + } + // Options configures auto-repair behavior for a given repair type. // All fields can be modified dynamically. public static class Options implements Serializable @@ -348,7 +404,6 @@ protected static Options getDefaultOptions() opts.enabled = false; opts.repair_by_keyspace = false; - opts.number_of_subranges = 16; opts.number_of_repair_threads = 1; opts.parallel_repair_count = 3; opts.parallel_repair_percentage = 3; @@ -359,7 +414,7 @@ protected static Options getDefaultOptions() opts.force_repair_new_node = false; opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.mv_repair_enabled = false; - opts.token_range_splitter = new ParameterizedClass(DefaultAutoRepairTokenSplitter.class.getName(), Collections.emptyMap()); + opts.token_range_splitter = new ParameterizedClass(DEFAULT_SPLITTER.getName(), Collections.emptyMap()); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); // 5 minutes opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours @@ -370,13 +425,6 @@ protected static Options getDefaultOptions() public volatile Boolean enabled; // auto repair is default repair table by table, if this is enabled, the framework will repair all the tables in a keyspace in one go. public volatile Boolean repair_by_keyspace; - // the number of subranges to split each to-be-repaired token range into, - // the higher this number, the smaller the repair sessions will be - // How many subranges to divide one range into? The default is 1. - // If you are using v-node, say 256, then the repair will always go one v-node range at a time, this parameter, additionally, will let us further subdivide a given v-node range into sub-ranges. - // With the value “1” and v-nodes of 256, a given table on a node will undergo the repair 256 times. But with a value “2,” the same table on a node will undergo a repair 512 times because every v-node range will be further divided by two. - // If you do not use v-nodes or the number of v-nodes is pretty small, say 8, setting this value to a higher number, say 16, will be useful to repair on a smaller range, and the chance of succeeding is higher. - public volatile Integer number_of_subranges; // the number of repair threads to run for a given invoked Repair Job. // Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. // This is similar to -j for repair options for the nodetool repair command. @@ -423,8 +471,12 @@ protected static Options getDefaultOptions() // the default is 'true'. // This flag determines whether the auto-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. public volatile Boolean mv_repair_enabled; - // the default is DefaultAutoRepairTokenSplitter. The class should implement IAutoRepairTokenRangeSplitter. - // The default implementation splits the tokens based on the token ranges owned by this node divided by the number of 'number_of_subranges' + /** + * Splitter implementation to use for generating repair assignments. + *

+ * The default is {@link RepairTokenRangeSplitter}. The class should implement {@link IAutoRepairTokenRangeSplitter} + * and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) + */ public volatile ParameterizedClass token_range_splitter; // the minimum delay after a node starts before the scheduler starts running repair public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; @@ -437,7 +489,6 @@ public String toString() return "Options{" + "enabled=" + enabled + ", repair_by_keyspace=" + repair_by_keyspace + - ", number_of_subranges=" + number_of_subranges + ", number_of_repair_threads=" + number_of_repair_threads + ", parallel_repair_count=" + parallel_repair_count + ", parallel_repair_percentage=" + parallel_repair_percentage + diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 635abea37177..e88c4fb2aa99 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -434,7 +434,7 @@ protected static TreeSet getHostIdsInCurrentRing(RepairType repairType, Co } else { - logger.info("Node is not present in Gossipe cache node {}, node data center {}", node, nodeDC); + logger.info("Node is not present in Gossip cache node {}, node data center {}", node, nodeDC); } } return hostIdsInCurrentRing; diff --git a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java similarity index 55% rename from src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java rename to src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 7bb1042b41c8..9fb39199d745 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/DefaultAutoRepairTokenSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -23,7 +23,7 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; -import java.util.NoSuchElementException; +import java.util.Map; import org.apache.cassandra.service.AutoRepairService; @@ -33,68 +33,49 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; -public class DefaultAutoRepairTokenSplitter implements IAutoRepairTokenRangeSplitter +public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { - @Override - public Iterator getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans) + /** + * The number of subranges to split each to-be-repaired token range into, + * the higher this number, the smaller the repair sessions will be + * How many subranges to divide one range into? The default is 1. + * If you are using v-node, say 256, then the repair will always go one v-node range at a time, this parameter, additionally, will let us further subdivide a given v-node range into sub-ranges. + * With the value “1” and v-nodes of 256, a given table on a node will undergo the repair 256 times. But with a value “2,” the same table on a node will undergo a repair 512 times because every v-node range will be further divided by two. + * If you do not use v-nodes or the number of v-nodes is pretty small, say 8, setting this value to a higher number, say 16, will be useful to repair on a smaller range, and the chance of succeeding is higher. + */ + static final String NUMBER_OF_SUBRANGES = "number_of_subranges"; + + private final AutoRepairConfig.RepairType repairType; + private int numberOfSubranges; + + public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map parameters) { - return new RepairAssignmentIterator(repairType, primaryRangeOnly, repairPlans); - } - - private class RepairAssignmentIterator implements Iterator - { - - private final AutoRepairConfig.RepairType repairType; - private final boolean primaryRangeOnly; + this.repairType = repairType; - private final Iterator repairPlanIterator; - - private Iterator currentIterator = null; - private PrioritizedRepairPlan currentPlan = null; - - RepairAssignmentIterator(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans) + if (parameters.containsKey(NUMBER_OF_SUBRANGES)) { - this.repairType = repairType; - this.primaryRangeOnly = primaryRangeOnly; - this.repairPlanIterator = repairPlans.iterator(); + numberOfSubranges = Integer.parseInt(parameters.get(NUMBER_OF_SUBRANGES)); } - - private synchronized Iterator currentIterator() + else { - if (currentIterator == null || !currentIterator.hasNext()) - { - // Advance the repair plan iterator if the current repair plan is exhausted, but only - // if there are more repair plans. - if (repairPlanIterator.hasNext()) - { - currentPlan = repairPlanIterator.next(); - currentIterator = currentPlan.getKeyspaceRepairPlans().iterator(); - } - } - - return currentIterator; + numberOfSubranges = 1; } + } - @Override - public boolean hasNext() - { - return currentIterator().hasNext(); - } + @Override + public Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans) + { + return new RepairAssignmentIterator(repairPlans) { - @Override - public KeyspaceRepairAssignments next() - { - if (!currentIterator.hasNext()) + @Override + KeyspaceRepairAssignments nextInternal(int priority, KeyspaceRepairPlan repairPlan) { - throw new NoSuchElementException("No remaining repair plans"); + return getRepairAssignmentsForKeyspace(primaryRangeOnly, priority, repairPlan); } - - final KeyspaceRepairPlan repairPlan = currentIterator().next(); - return getRepairAssignmentsForKeyspace(repairType, primaryRangeOnly, currentPlan.getPriority(), repairPlan); - } + }; } - private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, int priority, KeyspaceRepairPlan repairPlan) + private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(boolean primaryRangeOnly, int priority, KeyspaceRepairPlan repairPlan) { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); List repairAssignments = new ArrayList<>(); @@ -107,7 +88,6 @@ private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(AutoRepairConf // if we need to repair non-primary token ranges, then change the tokens accrodingly tokens = StorageService.instance.getLocalReplicas(keyspaceName).onlyFull().ranges(); } - int numberOfSubranges = config.getRepairSubRangeNum(repairType); boolean byKeyspace = config.getRepairByKeyspace(repairType); // collect all token ranges. @@ -139,4 +119,31 @@ private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(AutoRepairConf return new KeyspaceRepairAssignments(priority, keyspaceName, repairAssignments); } + + @Override + public void setParameter(String key, String value) + { + if (key.equals(NUMBER_OF_SUBRANGES)) + { + setNumberOfSubranges(Integer.parseInt(value)); + } + throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be " + NUMBER_OF_SUBRANGES); + } + + @Override + public Map getParameters() + { + return Map.of(NUMBER_OF_SUBRANGES, Integer.toString(getNumberOfSubranges())); + } + + public void setNumberOfSubranges(int numberOfSubranges) + { + this.numberOfSubranges = numberOfSubranges; + } + + public int getNumberOfSubranges() + { + return this.numberOfSubranges; + } + } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index b51fb19ab460..86bac32c96bb 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -17,19 +17,55 @@ */ package org.apache.cassandra.repair.autorepair; +import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Map; +import org.apache.cassandra.config.ParameterizedClass; + +/** + * Interface that defines how to generate {@link KeyspaceRepairAssignments}. + *

+ * The default is {@link RepairTokenRangeSplitter} which aims to provide sensible defaults for all repair types. + *

+ * Custom implementations class should require a constructor accepting + * ({@link AutoRepairConfig.RepairType}, {@link java.util.Map}) with the {@link java.util.Map} parameter accepting + * custom configuration for your splitter. If such a constructor does not exist, + * {@link AutoRepair#newAutoRepairTokenRangeSplitter(AutoRepairConfig.RepairType, ParameterizedClass)} + * will fall back on invoking a default zero argument constructor. + */ public interface IAutoRepairTokenRangeSplitter { - /** * Split the token range you wish to repair into multiple assignments. - * The autorepair framework will repair the list of returned subrange in a sequence. - * @param repairType The type of repair being executed + * The autorepair framework will repair the assignments from returned subrange iterator in the sequence it's + * provided. * @param primaryRangeOnly Whether to repair only this node's primary ranges or all of its ranges. * @param repairPlans A list of ordered prioritized repair plans to generate assignments for in order. - * @return Iterator of repair assignments, with each element representing a grouping of repair assignments for a given keyspace. + * @return iterator of repair assignments, with each element representing a grouping of repair assignments for a given keyspace. + * The iterator is traversed lazily {@link KeyspaceRepairAssignments} at a time with the intent to try to get the + * most up-to-date representation of your data (e.g. how much data exists and is unrepaired at a given time). + */ + Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans); + + /** + * Update a configuration parameter. This is meant to be used by nodetool setautorepairconfig to + * update configuration dynamically. + * @param key parameter to update + * @param value The value to set to. + */ + default void setParameter(String key, String value) + { + throw new IllegalArgumentException(this.getClass().getName() + " does not support custom configuration"); + } + + /** + * @return custom configuration. This is meant to be used by nodetool getautorepairconfig for + * retrieving the splitter configuration. */ - Iterator getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans); + default Map getParameters() + { + return Collections.emptyMap(); + } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java index 857fbe19b406..fbedb71b7c3c 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java +++ b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java @@ -33,7 +33,7 @@ /** * Encapsulates a devised plan to repair tables, grouped by their keyspace and a given priority. This is used * by {@link AutoRepair} to pass in an organized plan to - * {@link IAutoRepairTokenRangeSplitter#getRepairAssignments(AutoRepairConfig.RepairType, boolean, List)} which + * {@link IAutoRepairTokenRangeSplitter#getRepairAssignments(boolean, List)} which * can iterate over this plan in order to generate {@link RepairAssignment}s. */ public class PrioritizedRepairPlan diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java new file mode 100644 index 000000000000..5e6f8b61c3b8 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Convenience {@link Iterator} implementation for to assist implementations of + * {@link IAutoRepairTokenRangeSplitter#getRepairAssignments(boolean, List)} by passing {@link KeyspaceRepairPlan} + * to a custom {@link #nextInternal(int, KeyspaceRepairPlan)} in priority order. + */ +public abstract class RepairAssignmentIterator implements Iterator +{ + private final Iterator repairPlanIterator; + + private Iterator currentIterator = null; + private PrioritizedRepairPlan currentPlan = null; + + RepairAssignmentIterator(List repairPlans) + { + this.repairPlanIterator = repairPlans.iterator(); + } + + private synchronized Iterator currentIterator() + { + if (currentIterator == null || !currentIterator.hasNext()) + { + // Advance the repair plan iterator if the current repair plan is exhausted, but only + // if there are more repair plans. + if (repairPlanIterator.hasNext()) + { + currentPlan = repairPlanIterator.next(); + currentIterator = currentPlan.getKeyspaceRepairPlans().iterator(); + } + } + + return currentIterator; + } + + @Override + public boolean hasNext() + { + return currentIterator().hasNext(); + } + + @Override + public KeyspaceRepairAssignments next() + { + if (!currentIterator.hasNext()) + { + throw new NoSuchElementException("No remaining repair plans"); + } + + final KeyspaceRepairPlan repairPlan = currentIterator().next(); + return nextInternal(currentPlan.getPriority(), repairPlan); + } + + /** + * Invoked by {@link #next()} with the next {@link KeyspaceRepairPlan} for the given priority. + * @param priority current priority being processed. + * @param repairPlan the next keyspace repair plan to process + * @return assignments for the given keyspace at this priority. Should never return null, if one desires to + * short-circuit the iterator, override {@link #hasNext()}. + */ + abstract KeyspaceRepairAssignments nextInternal(int priority, KeyspaceRepairPlan repairPlan); +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java similarity index 57% rename from src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java rename to src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index fab0f07fb195..e724560eaad4 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -22,11 +22,13 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.EnumMap; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.NoSuchElementException; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -61,163 +63,258 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; /** - * In Apache Cassandra, tuning the repair ranges has three main goals: - *

- * 1. **Create smaller, consistent repair times**: Long repairs, such as those lasting 15 hours, can be problematic. - * If a node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact - * of disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration - * becoming the main bottleneck. - *

- * 2. **Minimize the impact on hosts**: Repairs should not heavily affect the host systems. For incremental repairs, - * this might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide - * partitions—can lead to issues with disk usage and higher compaction costs. - *

- * 3. **Reduce overstreaming**: The Merkle tree, which represents data within each partition and range, has a maximum size. - * If a repair covers too many partitions, the tree’s leaves represent larger data ranges. Even a small change in a leaf - * can trigger excessive data streaming, making the process inefficient. - *

- * Additionally, if there are many small tables, it's beneficial to batch these tables together under a single parent repair. - * This prevents the repair overhead from becoming a bottleneck, especially when dealing with hundreds of tables. Running - * individual repairs for each table can significantly impact performance and efficiency. - *

- * To manage these issues, the strategy involves estimating the size and number of partitions within a range and splitting - * it accordingly to bound the size of the range splits. + * In Apache Cassandra, tuning repair ranges has four main goals: + *

    + *
  1. + * Create smaller, consistent repair times: Long repairs, such as those lasting 15 hours, can be problematic. + * If a node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact + * of disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration + * becoming the main bottleneck. + *
  2. + *
  3. + * Minimize the impact on hosts: Repairs should not heavily affect the host systems. For incremental repairs, + * this might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide + * partitions—can lead to issues with disk usage and higher compaction costs. + *
  4. + *
  5. + * Reduce overstreaming: The Merkle tree, which represents data within each partition and range, + * has a maximum size. If a repair covers too many partitions, the tree’s leaves represent larger data ranges. + * Even a small change in a leaf can trigger excessive data streaming, making the process inefficient. + *
  6. + *
  7. + * Reduce number of repairs: If there are many small tables, it's beneficial to batch these tables together + * under a single parent repair. This prevents the repair overhead from becoming a bottleneck, especially when + * dealing with hundreds of tables. Running individual repairs for each table can significantly impact performance + * and efficiency. + *
  8. + *
+ * To manage these issues, the strategy involves estimating the size and number of partitions within a range and + * splitting it accordingly to bound the size of the range splits. This is established by iterating over SSTable + * index files to estimate the amount of bytes and partitions involved in the ranges being repaired and by what + * repair type is beinb invoked. + *

+ * While this splitter has a lot of tuning parameters, the expectation is that the established default configuration + * shall be sensible for all {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType}'s. The following + * configuration parameters are offered. + *

    + *
  • + * bytes_per_assigment: The target and maximum amount of bytes that should be included in a repair + * assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this + * involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being + * repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction + * that is expected. For all other repair types, this involves the amount of data covering the range being + * repaired. + *
  • + *
  • + * partitions_per_assignment: The target number of partitions that should be included in a repair + * assignment. This configuration exists to reduce excessive overstreaming. + *
  • + *
  • + * max_tables_per_assignment: The maximum number of tables that can be included in a repair assignment. + * This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for + * a keyspace. Note that the splitter will avoid batching tables together if they exceed the other + * configuration paramters such as bytes_per_assignment and partitions_per_assignment. + *
  • + *
  • + * max_bytes_per_schedule: The maximum number of bytes to cover an individiual schedule. This serves + * as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to + * reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if + * writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up + * the min_repair_interval. + *
  • + *
+ * Given the impact of what each repair type accomplishes, different defaults are established per repair type. + *
    + *
  • + * full: Configured in a way that attempts to accomplish repairing all data in a schedule, with + * individual repairs targeting at most 200GiB of data and 1048576 partitions. + *
      + *
    • bytes_per_assignment: 200GiB
    • + *
    • partitions_per_assignment: 2^repair_session_max_tree_depth (2^20 == 1048576 by default)
    • + *
    • max_tables_per_assignment: 64
    • + *
    • max_bytes_per_schedule: 100TiB
    • + *
    + *
  • + *
  • + * incremental: Configured in a way that attempts to repair 50GiB of data per repair, and 500GiB per + * schedule. This attempts to throttle the amount of IR and anticompaction done per schedule after turning + * incremental on for the first time. You may want to consider increasing max_bytes_per_schedule + * more than this much data is written per min_repair_interval. + *
      + *
    • bytes_per_assignment: 50GiB
    • + *
    • partitions_per_assignment: 2^repair_session_max_tree_depth (2^20 == 1048576 by default)
    • + *
    • max_tables_per_assignment: 64
    • + *
    • max_bytes_per_schedule: 500GiB
    • + *
    + *
  • + *
  • + * preview_repaired: Configured in a way that attempts to accomplish previewing all data in a schedule, + * with previews targeting at most 200GiB of data and 1048576 partitions. + *
      + *
    • bytes_per_assignment: 200GiB
    • + *
    • partitions_per_assignment: 2^repair_session_max_tree_depth (2^20 == 1048576 by default)
    • + *
    • max_tables_per_assignment: 64
    • + *
    • max_bytes_per_schedule: 100TiB
    • + *
    + *
  • + *
*/ - -public class RepairRangeSplitter implements IAutoRepairTokenRangeSplitter +public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { - private static final Logger logger = LoggerFactory.getLogger(RepairRangeSplitter.class); + private static final Logger logger = LoggerFactory.getLogger(RepairTokenRangeSplitter.class); + + // Default max bytes to 100TiB, which is much more readable than Long.MAX_VALUE + private static final DataStorageSpec.LongBytesBound MAX_BYTES = new DataStorageSpec.LongBytesBound(100_000, DataStorageSpec.DataStorageUnit.GIBIBYTES); - static final String SUBRANGE_SIZE = "bytes_per_assignment"; - static final String PARTITION_COUNT = "partitions_per_assignment"; - static final String TABLE_BATCH_LIMIT = "max_tables_per_assignment"; + static final String BYTES_PER_ASSIGNMENT = "bytes_per_assignment"; + static final String PARTITIONS_PER_ASSIGNMENT = "partitions_per_assignment"; + static final String MAX_TABLES_PER_ASSIGNMENT = "max_tables_per_assignment"; static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; - private final int tablesPerAssignmentLimit; - private final long maxBytesPerSchedule; - private final long bytesPerSubrange; - private final long partitionsPerSubrange; + static final List PARAMETERS = List.of(BYTES_PER_ASSIGNMENT, PARTITIONS_PER_ASSIGNMENT, MAX_TABLES_PER_ASSIGNMENT, MAX_BYTES_PER_SCHEDULE); + + private final AutoRepairConfig.RepairType repairType; + + private final Map givenParameters = new HashMap<>(); - private static final long DEFAULT_SUBRANGE_SIZE = new DataStorageSpec.LongBytesBound("100GiB").toBytes(); - private static final long DEFAULT_MAX_BYTES_PER_SCHEDULE = Long.MAX_VALUE; - private static final long DEFAULT_PARTITION_LIMIT = (long) Math.pow(2, DatabaseDescriptor.getRepairSessionMaxTreeDepth()); - private static final int DEFAULT_TABLE_BATCH_LIMIT = 64; + private DataStorageSpec.LongBytesBound bytesPerAssignment; + private long partitionsPerAssignment; + private int maxTablesPerAssignment; + private DataStorageSpec.LongBytesBound maxBytesPerSchedule; - public RepairRangeSplitter(Map parameters) + /** + * Established default for each {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType}, meant to + * choose sensible defaults for each. + *

+ * Defaults if not specified for the given repair type: + *

  • + *
      bytes_per_assignment: 200GiB
    + *
      partitions_per_assignment: 2^repair_session_max_tree_depth
    + *
      max_tables_per_assignment: 64
    + *
      max_bytes_per_schedule: 1000GiB
    + *
  • + * It's expected that these defaults should work well for everything except incremental, so we confine + * bytes_per_assignment to 50GiB and max_bytes_per_schedule to 500GiB. This should strike a good balance + * between the amount of data that will be repaired during an initial migration to incremental repair and should + * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 500GiB of + * data is written to a node per min_repair_interval. + */ + private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) {{ + put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) + .build()); + // Restrict incremental repair to 50GB bytes per assignment to confine the amount of possible autocompaction. + put(AutoRepairConfig.RepairType.INCREMENTAL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.INCREMENTAL) + .withBytesPerAssignment(new DataStorageSpec.LongBytesBound("50GiB")) + .withMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound("500GiB")) + .build()); + put(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.PREVIEW_REPAIRED) + .build()); + }}; + + + public RepairTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map parameters) { - if (parameters.containsKey(SUBRANGE_SIZE)) + this.repairType = repairType; + this.givenParameters.putAll(parameters); + RepairTypeDefaults defaults = DEFAULTS_BY_REPAIR_TYPE.get(repairType); + + if (parameters.containsKey(BYTES_PER_ASSIGNMENT)) { - bytesPerSubrange = new DataStorageSpec.LongBytesBound(parameters.get(SUBRANGE_SIZE)).toBytes(); + bytesPerAssignment = new DataStorageSpec.LongBytesBound(parameters.get(BYTES_PER_ASSIGNMENT)); } else { - bytesPerSubrange = DEFAULT_SUBRANGE_SIZE; + bytesPerAssignment = defaults.bytesPerAssignment; } - if (parameters.containsKey(MAX_BYTES_PER_SCHEDULE)) + if (parameters.containsKey(PARTITIONS_PER_ASSIGNMENT)) { - maxBytesPerSchedule = new DataStorageSpec.LongBytesBound(parameters.get(MAX_BYTES_PER_SCHEDULE)).toBytes(); + partitionsPerAssignment = Long.parseLong(parameters.get(PARTITIONS_PER_ASSIGNMENT)); } else { - maxBytesPerSchedule = DEFAULT_MAX_BYTES_PER_SCHEDULE; + partitionsPerAssignment = defaults.partitionsPerAssignment; } - if (parameters.containsKey(PARTITION_COUNT)) + if (parameters.containsKey(MAX_TABLES_PER_ASSIGNMENT)) { - partitionsPerSubrange = Long.parseLong(parameters.get(PARTITION_COUNT)); + maxTablesPerAssignment = Integer.parseInt(parameters.get(MAX_TABLES_PER_ASSIGNMENT)); } - else - { - partitionsPerSubrange = DEFAULT_PARTITION_LIMIT; + else{ + maxTablesPerAssignment = defaults.maxTablesPerAssignment; } - if (parameters.containsKey(TABLE_BATCH_LIMIT)) + if (parameters.containsKey(MAX_BYTES_PER_SCHEDULE)) { - tablesPerAssignmentLimit = Integer.parseInt(parameters.get(TABLE_BATCH_LIMIT)); + maxBytesPerSchedule = new DataStorageSpec.LongBytesBound(parameters.get(MAX_BYTES_PER_SCHEDULE)); } else { - tablesPerAssignmentLimit = DEFAULT_TABLE_BATCH_LIMIT; + maxBytesPerSchedule = defaults.maxBytesPerSchedule; } - if (bytesPerSubrange > maxBytesPerSchedule) + + if (bytesPerAssignment.toBytes() > maxBytesPerSchedule.toBytes()) { - throw new IllegalArgumentException(String.format("%s='%s' cannot be greater than %s='%s'", - SUBRANGE_SIZE, - FileUtils.stringifyFileSize(bytesPerSubrange), + throw new IllegalArgumentException(String.format("%s='%s' cannot be greater than %s='%s' for %s", + BYTES_PER_ASSIGNMENT, + bytesPerAssignment.toBytes(), MAX_BYTES_PER_SCHEDULE, - FileUtils.stringifyFileSize(maxBytesPerSchedule))); + maxBytesPerSchedule, + repairType.getConfigName())); } - logger.info("Configured {} with {}={}, {}={}, {}={}, {}={}", RepairRangeSplitter.class.getName(), - SUBRANGE_SIZE, FileUtils.stringifyFileSize(bytesPerSubrange), - MAX_BYTES_PER_SCHEDULE, FileUtils.stringifyFileSize(maxBytesPerSchedule), - PARTITION_COUNT, partitionsPerSubrange, TABLE_BATCH_LIMIT, tablesPerAssignmentLimit); + logger.info("Configured {}[{}] with {}={}, {}={}, {}={}, {}={}", RepairTokenRangeSplitter.class.getName(), + repairType.getConfigName(), + BYTES_PER_ASSIGNMENT, bytesPerAssignment, + PARTITIONS_PER_ASSIGNMENT, partitionsPerAssignment, + MAX_TABLES_PER_ASSIGNMENT, maxTablesPerAssignment, + MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule); } - @Override - public Iterator getRepairAssignments(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans) + public AutoRepairConfig.RepairType getRepairType() { - return new RepairAssignmentIterator(repairType, primaryRangeOnly, repairPlans); + return repairType; } - private class RepairAssignmentIterator implements Iterator - { - private final AutoRepairConfig.RepairType repairType; - private final boolean primaryRangeOnly; + @Override + public Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans) + { + return new BytesBasedRepairAssignmentIterator(primaryRangeOnly, repairPlans); + } - private final Iterator repairPlanIterator; + /** + * A custom {@link RepairAssignmentIterator} that confines the number of repair assignments to + * max_bytes_per_schedule. + */ + private class BytesBasedRepairAssignmentIterator extends RepairAssignmentIterator { - private Iterator currentIterator = null; - private PrioritizedRepairPlan currentPlan = null; + private final boolean primaryRangeOnly; private long bytesSoFar = 0; - RepairAssignmentIterator(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, List repairPlans) + BytesBasedRepairAssignmentIterator(boolean primaryRangeOnly, List repairPlans) { - this.repairType = repairType; + super(repairPlans); this.primaryRangeOnly = primaryRangeOnly; - this.repairPlanIterator = repairPlans.iterator(); - } - - private synchronized Iterator currentIterator() - { - if (currentIterator == null || !currentIterator.hasNext()) - { - // Advance the repair plan iterator if the current repair plan is exhausted, but only - // if there are more repair plans. - if (repairPlanIterator.hasNext()) - { - currentPlan = repairPlanIterator.next(); - currentIterator = currentPlan.getKeyspaceRepairPlans().iterator(); - } - } - - return currentIterator; - } - - @Override - public boolean hasNext() - { - return currentIterator().hasNext(); } @Override - public KeyspaceRepairAssignments next() + KeyspaceRepairAssignments nextInternal(int priority, KeyspaceRepairPlan repairPlan) { - // Should not happen unless violating the contract of iterator of checking hasNext first. - if (!currentIterator.hasNext()) + // short circuit if we've accumulated too many bytes by returning a KeyspaceRepairAssignments with + // no assignments. We do this rather than returning false in hasNext() because we want to signal + // to AutoRepair that a keyspace generated no assignments. + if (bytesSoFar >= maxBytesPerSchedule.toBytes()) { - throw new NoSuchElementException("No remaining repair plans"); + return new KeyspaceRepairAssignments(priority, repairPlan.getKeyspaceName(), Collections.emptyList()); } - final KeyspaceRepairPlan repairPlan = currentIterator().next(); Collection> tokenRanges = getTokenRanges(primaryRangeOnly, repairPlan.getKeyspaceName()); List repairAssignments = getRepairAssignmentsForKeyspace(repairType, repairPlan.getKeyspaceName(), repairPlan.getTableNames(), tokenRanges); - FilteredRepairAssignments filteredRepairAssignments = filterRepairAssignments(repairType, currentPlan.getPriority(), repairPlan.getKeyspaceName(), repairAssignments, bytesSoFar); + FilteredRepairAssignments filteredRepairAssignments = filterRepairAssignments(priority, repairPlan.getKeyspaceName(), repairAssignments, bytesSoFar); bytesSoFar = filteredRepairAssignments.newBytesSoFar; - return new KeyspaceRepairAssignments(currentPlan.getPriority(), repairPlan.getKeyspaceName(), filteredRepairAssignments.repairAssignments); + return new KeyspaceRepairAssignments(priority, repairPlan.getKeyspaceName(), filteredRepairAssignments.repairAssignments); } } @@ -245,7 +342,7 @@ List getRepairAssignmentsForKeyspace(AutoRepairConfig.Rep for (String tableName : tableNames) { - List tableAssignments = getRepairAssignmentsForTable(repairType, keyspaceName, tableName, tokenRanges); + List tableAssignments = getRepairAssignmentsForTable(keyspaceName, tableName, tokenRanges); if (tableAssignments.isEmpty()) continue; @@ -257,13 +354,13 @@ List getRepairAssignmentsForKeyspace(AutoRepairConfig.Rep } // If the table assignments are for the same token range, and we have room to add more tables to the current assignment else if (tableAssignments.size() == 1 && - currentAssignments.size() < tablesPerAssignmentLimit && - (currentAssignments.isEmpty() || currentAssignments.get(0).getTokenRange().equals(tableAssignments.get(0).getTokenRange()))) + currentAssignments.size() < maxTablesPerAssignment && + (currentAssignments.isEmpty() || currentAssignments.get(0).getTokenRange().equals(tableAssignments.get(0).getTokenRange()))) { long currentAssignmentsBytes = getEstimatedBytes(currentAssignments); long tableAssignmentsBytes = getEstimatedBytes(tableAssignments); // only add assignments together if they don't exceed max bytes per schedule. - if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule) { + if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule.toBytes()) { currentAssignments.addAll(tableAssignments); } else @@ -291,13 +388,12 @@ else if (tableAssignments.size() == 1 && /** * Given a repair type and map of sized-based repair assignments, confine them by maxBytesPerSchedule. - * @param repairType used to determine underyling table priorities * @param repairAssignments the assignments to filter. * @param bytesSoFar repair assignment bytes accumulated so far. * @return A list of repair assignments confined by maxBytesPerSchedule. */ @VisibleForTesting - FilteredRepairAssignments filterRepairAssignments(AutoRepairConfig.RepairType repairType, int priority, String keyspaceName, List repairAssignments, long bytesSoFar) + FilteredRepairAssignments filterRepairAssignments(int priority, String keyspaceName, List repairAssignments, long bytesSoFar) { // Confine repair assignments by maxBytesPerSchedule. long bytesSoFarThisIteration = 0L; @@ -311,7 +407,7 @@ FilteredRepairAssignments filterRepairAssignments(AutoRepairConfig.RepairType re { totalAssignments++; // skip any repair assignments that would accumulate us past the maxBytesPerSchedule - if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule) + if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule.toBytes()) { // log that repair assignment was skipped. bytesNotRepaired += repairAssignment.getEstimatedBytes(); @@ -379,11 +475,12 @@ private FilteredRepairAssignments(List repairAssignments, long } } - private String getBytesOfMaxBytesPerSchedule(long bytes) { - if (maxBytesPerSchedule == Long.MAX_VALUE) + private String getBytesOfMaxBytesPerSchedule(long bytes) + { + if (maxBytesPerSchedule.equals(MAX_BYTES)) return FileUtils.stringifyFileSize(bytes); else - return String.format("%s of %s", FileUtils.stringifyFileSize(bytes), FileUtils.stringifyFileSize(maxBytesPerSchedule)); + return String.format("%s of %s", FileUtils.stringifyFileSize(bytes), maxBytesPerSchedule); } /** @@ -427,9 +524,9 @@ static SizedRepairAssignment merge(List assignments) } @VisibleForTesting - protected List getRepairAssignmentsForTable(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Collection> tokenRanges) + protected List getRepairAssignmentsForTable(String keyspaceName, String tableName, Collection> tokenRanges) { - List sizeEstimates = getRangeSizeEstimate(repairType, keyspaceName, tableName, tokenRanges); + List sizeEstimates = getRangeSizeEstimate(keyspaceName, tableName, tokenRanges); return getRepairAssignments(sizeEstimates); } @@ -461,7 +558,7 @@ protected List getRepairAssignments(List si else { // Check if the estimate needs splitting based on the criteria - boolean needsSplitting = estimate.sizeForRepair > bytesPerSubrange || estimate.partitions > partitionsPerSubrange; + boolean needsSplitting = estimate.sizeForRepair > bytesPerAssignment.toBytes() || estimate.partitions > partitionsPerAssignment; if (needsSplitting) { int numberOfSplits = calculateNumberOfSplits(estimate); @@ -491,8 +588,8 @@ protected List getRepairAssignments(List si private int calculateNumberOfSplits(SizeEstimate estimate) { // Calculate the number of splits needed for size and partitions - int splitsForSize = (int) Math.ceil((double) estimate.sizeForRepair / bytesPerSubrange); - int splitsForPartitions = (int) Math.ceil((double) estimate.partitions / partitionsPerSubrange); + int splitsForSize = (int) Math.ceil((double) estimate.sizeForRepair / bytesPerAssignment.toBytes()); + int splitsForPartitions = (int) Math.ceil((double) estimate.partitions / partitionsPerAssignment); // Split the token range into subranges based on whichever (partitions, bytes) would generate the most splits. boolean splitBySize = splitsForSize > splitsForPartitions; @@ -536,7 +633,7 @@ private Collection> getTokenRanges(boolean primaryRangeOnly, String return ranges; } - private List getRangeSizeEstimate(AutoRepairConfig.RepairType repairType, String keyspace, String table, Collection> tokenRanges) + private List getRangeSizeEstimate(String keyspace, String table, Collection> tokenRanges) { List sizeEstimates = new ArrayList<>(); for (Range tokenRange : tokenRanges) @@ -638,6 +735,111 @@ static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repa return refs; } + @Override + public void setParameter(String key, String value) + { + switch (key) + { + case BYTES_PER_ASSIGNMENT: + setBytesPerAssignment(new DataStorageSpec.LongBytesBound(value)); + givenParameters.put(key, value); + return; + case PARTITIONS_PER_ASSIGNMENT: + setPartitionsPerAssignment(Long.parseLong(value)); + givenParameters.put(key, value); + return; + case MAX_TABLES_PER_ASSIGNMENT: + setMaxTablesPerAssignment(Integer.parseInt(value)); + givenParameters.put(key, value); + return; + case MAX_BYTES_PER_SCHEDULE: + setMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound(value)); + givenParameters.put(key, value); + return; + default: + throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be one of " + + PARAMETERS); + } + } + + @Override + public Map getParameters() + { + final Map parameters = new LinkedHashMap<>(); + for (String parameter : PARAMETERS) + { + // Use the parameter as configured if it exists, user would likely to prefer to see something more readable. + if (givenParameters.containsKey(parameter)) + { + parameters.put(parameter, givenParameters.get(parameter)); + continue; + } + + switch (parameter) + { + case BYTES_PER_ASSIGNMENT: + parameters.put(parameter, getBytesPerAssignment().toString()); + continue; + case PARTITIONS_PER_ASSIGNMENT: + parameters.put(parameter, Long.toString(getPartitionsPerAssignment())); + continue; + case MAX_TABLES_PER_ASSIGNMENT: + parameters.put(parameter, Integer.toString(getMaxTablesPerAssignment())); + continue; + case MAX_BYTES_PER_SCHEDULE: + parameters.put(parameter, getMaxBytesPerSchedule().toString()); + continue; + default: + // not expected + parameters.put(parameter, ""); + } + } + return parameters; + } + + public DataStorageSpec.LongBytesBound getBytesPerAssignment() + { + return bytesPerAssignment; + } + + public void setBytesPerAssignment(DataStorageSpec.LongBytesBound bytesPerAssignment) + { + this.bytesPerAssignment = bytesPerAssignment; + } + + public long getPartitionsPerAssignment() + { + return partitionsPerAssignment; + } + + public void setPartitionsPerAssignment(long partitionsPerAssignment) + { + this.partitionsPerAssignment = partitionsPerAssignment; + } + + public int getMaxTablesPerAssignment() + { + return maxTablesPerAssignment; + } + + public void setMaxTablesPerAssignment(int maxTablesPerAssignment) + { + this.maxTablesPerAssignment = maxTablesPerAssignment; + } + + public DataStorageSpec.LongBytesBound getMaxBytesPerSchedule() + { + return maxBytesPerSchedule; + } + + public void setMaxBytesPerSchedule(DataStorageSpec.LongBytesBound maxBytesPerSchedule) + { + this.maxBytesPerSchedule = maxBytesPerSchedule; + } + + /** + * Represents a size estimate by both bytes and partition count for a given keyspace and table for a token range. + */ @VisibleForTesting protected static class SizeEstimate { @@ -754,4 +956,79 @@ public String toString() '}'; } } + + /** + * Conveinence builder for establishing defaults by repair type. + */ + protected static class RepairTypeDefaults + { + final AutoRepairConfig.RepairType repairType; + final DataStorageSpec.LongBytesBound bytesPerAssignment; + final long partitionsPerAssignment; + final int maxTablesPerAssignment; + final DataStorageSpec.LongBytesBound maxBytesPerSchedule; + + public RepairTypeDefaults(AutoRepairConfig.RepairType repairType, + DataStorageSpec.LongBytesBound bytesPerAssignment, + long partitionsPerAssignment, + int maxTablesPerAssignment, + DataStorageSpec.LongBytesBound maxBytesPerSchedule) + { + this.repairType = repairType; + this.bytesPerAssignment = bytesPerAssignment; + this.partitionsPerAssignment = partitionsPerAssignment; + this.maxTablesPerAssignment = maxTablesPerAssignment; + this.maxBytesPerSchedule = maxBytesPerSchedule; + } + + static RepairTypeDefaultsBuilder builder(AutoRepairConfig.RepairType repairType) + { + return new RepairTypeDefaultsBuilder(repairType); + } + + static class RepairTypeDefaultsBuilder + { + private final AutoRepairConfig.RepairType repairType; + private DataStorageSpec.LongBytesBound bytesPerAssignment = new DataStorageSpec.LongBytesBound("200GiB"); + private long partitionsPerAssignment = (long) Math.pow(2, DatabaseDescriptor.getRepairSessionMaxTreeDepth()); + private int maxTablesPerAssignment = 64; + private DataStorageSpec.LongBytesBound maxBytesPerSchedule = MAX_BYTES; + + private RepairTypeDefaultsBuilder(AutoRepairConfig.RepairType repairType) + { + this.repairType = repairType; + } + + public RepairTypeDefaultsBuilder withBytesPerAssignment(DataStorageSpec.LongBytesBound bytesPerAssignment) + { + this.bytesPerAssignment = bytesPerAssignment; + return this; + } + + @SuppressWarnings("unused") + public RepairTypeDefaultsBuilder withPartitionsPerAssignment(long partitionsPerAssignment) + { + this.partitionsPerAssignment = partitionsPerAssignment; + return this; + } + + @SuppressWarnings("unused") + public RepairTypeDefaultsBuilder withMaxTablesPerAssignment(int maxTablesPerAssignment) + { + this.maxTablesPerAssignment = maxTablesPerAssignment; + return this; + } + + public RepairTypeDefaultsBuilder withMaxBytesPerSchedule(DataStorageSpec.LongBytesBound maxBytesPerSchedule) + { + this.maxBytesPerSchedule = maxBytesPerSchedule; + return this; + } + + public RepairTokenRangeSplitter.RepairTypeDefaults build() + { + return new RepairTypeDefaults(repairType, bytesPerAssignment, partitionsPerAssignment, maxTablesPerAssignment, maxBytesPerSchedule); + } + } + } } diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 167e4507a7c1..1a9c010928a7 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -27,6 +27,7 @@ import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; @@ -107,12 +108,6 @@ public void setForceRepairForHosts(RepairType repairType, Set getOnGoingRepairHostIds(RepairType rType) } return hostIds; } + + @Override + public Map getAutoRepairTokenRangeSplitterParameters(RepairType repairType) + { + return config.getTokenRangeSplitterInstance(repairType).getParameters(); + } + + @Override + public void setAutoRepairTokenRangeSplitterParameter(RepairType repairType, String key, String value) + { + config.getTokenRangeSplitterInstance(repairType).setParameter(key, value); + } } diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index cac2680c3f01..e69a45482bc0 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -21,6 +21,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import java.util.Map; import java.util.Set; public interface AutoRepairServiceMBean @@ -38,8 +39,6 @@ public interface AutoRepairServiceMBean public Set getRepairHostPriority(RepairType repairType); - public void setRepairSubRangeNum(RepairType repairType, int repairSubRangeNum); - public void setRepairMinInterval(RepairType repairType, String minRepairInterval); void startScheduler(); @@ -71,4 +70,8 @@ public interface AutoRepairServiceMBean public void setRepairSessionTimeout(RepairType repairType, String timeout); public Set getOnGoingRepairHostIds(RepairType rType); + + public Map getAutoRepairTokenRangeSplitterParameters(RepairType repairType); + + public void setAutoRepairTokenRangeSplitterParameter(AutoRepairConfig.RepairType repairType, String key, String value); } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 9a945f406d18..8f606900022a 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2568,6 +2568,16 @@ public AutoRepairConfig getAutoRepairConfig() { return autoRepairProxy.getAutoRepairConfig(); } + public Map getAutoRepairTokenRangeSplitterParameters(AutoRepairConfig.RepairType repairType) + { + return autoRepairProxy.getAutoRepairTokenRangeSplitterParameters(repairType); + } + + public void setAutoRepairTokenRangeSplitterParameter(AutoRepairConfig.RepairType repairType, String key, String value) + { + autoRepairProxy.setAutoRepairTokenRangeSplitterParameter(repairType, key, value); + } + public void setAutoRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) { autoRepairProxy.setAutoRepairEnabled(repairType, enabled); @@ -2592,11 +2602,6 @@ public void setForceRepairForHosts(AutoRepairConfig.RepairType repairType, Set priorityHosts = probe.getRepairPriorityForHosts(repairType); + if (!priorityHosts.isEmpty()) + { + appendConfig(sb, "priority_hosts", Joiner.on(',').skipNulls().join(priorityHosts)); + } + + appendConfig(sb , "min_repair_interval", config.getRepairMinInterval(repairType)); + appendConfig(sb , "repair_by_keyspace", config.getRepairByKeyspace(repairType)); + appendConfig(sb , "number_of_repair_threads", config.getRepairThreads(repairType)); + appendConfig(sb , "sstable_upper_threshold", config.getRepairSSTableCountHigherThreshold(repairType)); + appendConfig(sb , "table_max_repair_time", config.getAutoRepairTableMaxRepairTime(repairType)); + appendConfig(sb , "ignore_dcs", config.getIgnoreDCs(repairType)); + appendConfig(sb , "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); + appendConfig(sb , "parallel_repair_count", config.getParallelRepairCount(repairType)); + appendConfig(sb , "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); + appendConfig(sb , "mv_repair_enabled", config.getMVRepairEnabled(repairType)); + appendConfig(sb , "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); + appendConfig(sb , "repair_session_timeout", config.getRepairSessionTimeout(repairType)); + appendConfig(sb , "force_repair_new_node", config.getForceRepairNewNode(repairType)); + + final ParameterizedClass splitterClass = config.getTokenRangeSplitter(repairType); + final String splitterClassName = splitterClass.class_name != null ? splitterClass.class_name : AutoRepairConfig.DEFAULT_SPLITTER.getName(); + appendConfig(sb, "token_range_splitter", splitterClassName); + Map tokenRangeSplitterParameters = probe.getAutoRepairTokenRangeSplitterParameters(repairType); + if (!tokenRangeSplitterParameters.isEmpty()) + { + for (Map.Entry param : tokenRangeSplitterParameters.entrySet()) + { + appendConfig(sb, String.format("token_range_splitter.%s", param.getKey()), param.getValue()); + } + } + } return sb.toString(); } + + private void appendConfig(StringBuilder sb, String config, T value) + { + sb.append(String.format("%s%s: %s", "\n\t", config, value)); + } } diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 75cdea6cd1c0..d7f36f801613 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -27,6 +27,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; +import org.apache.cassandra.utils.LocalizeString; import java.io.PrintStream; import java.net.UnknownHostException; @@ -43,21 +44,23 @@ public class SetAutoRepairConfig extends NodeToolCmd @VisibleForTesting @Arguments(title = " ", usage = " ", description = "autorepair param and value.\nPossible autorepair parameters are as following: " + - "[start_scheduler|number_of_repair_threads|number_of_subranges|min_repair_interval|sstable_upper_threshold" + + "[start_scheduler|number_of_repair_threads|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + "|parallel_repair_count|parallel_repair_percentage|mv_repair_enabled|repair_max_retries" + - "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration]", + "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration|token_range_splitter.]", required = true) protected List args = new ArrayList<>(); @VisibleForTesting @Option(title = "repair type", name = { "-t", "--repair-type" }, description = "Repair type") - protected RepairType repairType; + protected String repairTypeStr; @VisibleForTesting protected PrintStream out = System.out; + private static final String TOKEN_RANGE_SPLITTER_PROPERTY_PREFIX = "token_range_splitter."; + @Override public void execute(NodeProbe probe) { @@ -98,7 +101,16 @@ public void execute(NodeProbe probe) } // options below require --repair-type option - checkArgument(repairType != null, "--repair-type is required for this parameter."); + checkArgument(repairTypeStr != null, "--repair-type is required for this parameter."); + final RepairType repairType = RepairType.valueOf(LocalizeString.toUpperCaseLocalized(repairTypeStr)); + + if(paramType.startsWith(TOKEN_RANGE_SPLITTER_PROPERTY_PREFIX)) + { + final String key = paramType.replace(TOKEN_RANGE_SPLITTER_PROPERTY_PREFIX, ""); + probe.setAutoRepairTokenRangeSplitterParameter(repairType, key, paramVal); + return; + } + Set hosts; switch (paramType) { @@ -108,9 +120,6 @@ public void execute(NodeProbe probe) case "number_of_repair_threads": probe.setRepairThreads(repairType, Integer.parseInt(paramVal)); break; - case "number_of_subranges": - probe.setRepairSubRangeNum(repairType, Integer.parseInt(paramVal)); - break; case "min_repair_interval": probe.setRepairMinInterval(repairType, paramVal); break; diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 2be783fe4a9d..9d55b4ca8280 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -66,9 +66,7 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; -import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.io.util.File; -import org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -732,32 +730,6 @@ public static AbstractCryptoProvider newCryptoProvider(String className, Map tokenRangeSplitterClass = Class.forName(className); - try - { - Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); - // first attempt to initialize with Map arguments. - return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(Map.class).newInstance(parameters); - } - catch (NoSuchMethodException nsme) - { - // fall back on no argument constructor. - return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); - } - } - catch (Exception ex) - { - throw new ConfigurationException("Unable to create instance of IAutoRepairTokenRangeSplitter for " + className, ex); - } - } /** * @return The Class for the given name. diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index caf1ad7c3ea0..67ba2b6009f1 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -75,6 +75,8 @@ public static void init() throws IOException "parallel_repair_percentage", "0", "min_repair_interval", "1s")))) .set("auto_repair.enabled", "true") + .set("auto_repair.global_settings.repair_by_keyspace", "true") + .set("auto_repair.repair_task_min_duration", "0s") .set("auto_repair.repair_check_interval", "10s")).start(); cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); @@ -124,12 +126,8 @@ private void validate(String repairType) throws ParseException sdf.parse(row[2].toString()); sdf.parse(row[3].toString()); // the reason why the repair was scheduled + Assert.assertNotNull(row[4]); Assert.assertEquals("MY_TURN", row[4].toString()); - for (Object col : row) - { - System.out.println("Data:" + col); - } - System.out.println("====================================="); } } } diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index d62e88091635..d44dea211548 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -116,12 +116,14 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.Config$TombstonesMetricGranularity", "org.apache.cassandra.repair.autorepair.AutoRepairConfig", "org.apache.cassandra.repair.autorepair.AutoRepairConfig$Options", - "org.apache.cassandra.repair.autorepair.DefaultAutoRepairTokenSplitter", - "org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter", - "org.apache.cassandra.repair.autorepair.FullRepairState", - "org.apache.cassandra.repair.autorepair.IncrementalRepairState", "org.apache.cassandra.repair.autorepair.AutoRepairConfig$RepairType", "org.apache.cassandra.repair.autorepair.AutoRepairState", + "org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter", + "org.apache.cassandra.repair.autorepair.FullRepairState", + "org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter", + "org.apache.cassandra.repair.autorepair.IncrementalRepairState", + "org.apache.cassandra.repair.autorepair.PreviewRepairedState", + "org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter", "org.apache.cassandra.config.DatabaseDescriptor$ByteUnit", "org.apache.cassandra.config.DataRateSpec", "org.apache.cassandra.config.DataRateSpec$DataRateUnit", diff --git a/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java similarity index 74% rename from test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java rename to test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java index 76aded0e0476..1b66086a4b71 100644 --- a/test/unit/org/apache/cassandra/db/streaming/CassandraSreamReceiverTest.java +++ b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java @@ -22,7 +22,6 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.cql3.CQLTester; @@ -38,7 +37,7 @@ import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.when; -public class CassandraSreamReceiverTest extends CQLTester +public class CassandraStreamReceiverTest extends CQLTester { @Mock private StreamSession session; @@ -48,15 +47,15 @@ public class CassandraSreamReceiverTest extends CQLTester private static final String CDC_MV_TABLE = "cdc_mv_table"; private static final String NO_CDC_MV_TABLE = "no_cdc_mv_table"; - @BeforeClass - public static void beforeClass() - { - DatabaseDescriptor.setCDCOnRepairEnabled(false); - } - @Before public void setup() { + // Set cdc_on_repair_enabled materialized_views_on_repair to true + DatabaseDescriptor.setCDCOnRepairEnabled(true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + // Enable materialized views + DatabaseDescriptor.setMaterializedViewsEnabled(true); + MockitoAnnotations.initMocks(this); QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=true", KEYSPACE, CDC_TABLE)); QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=false", KEYSPACE, MV_TABLE)); @@ -69,26 +68,35 @@ public void setup() @Test public void testRequiresWritePathRepair() { + // given a CDC table with a materialized view attached to it. ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_MV_TABLE); when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + // Should require write path since cdc_on_repair_enabled and materialized_views_on_repair_enabled are both true. assertTrue(receiver.requiresWritePath(cfs)); } @Test public void testRequiresWritePathBulkLoad() { + // given a CDC table with a materialized view attached to it. ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_MV_TABLE); when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + // Should require write path since cdc_on_repair_enabled and materialized_views_on_repair_enabled are both true. assertTrue(receiver.requiresWritePath(cfs)); } @Test - public void testRequiresWritePathNoCDCOrMV() + public void testDoesNotRequireWritePathNoCDCOrMV() { + // Given cdc_on_repaired_enabled and materialized_views_on_repair_enabled are false + // requiresWritePath should still return false for a non-CDC table. + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(NO_CDC_MV_TABLE); when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); @@ -99,7 +107,8 @@ public void testRequiresWritePathNoCDCOrMV() @Test public void testRequiresWritePathRepairMVOnly() { - // Access the private field using reflection + // Given cdc_on_repaired_enabled and materialized_views_on_repair_enabled are true + // requiresWritePath should return true for a table with materialized views. ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(MV_TABLE); when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); @@ -108,8 +117,10 @@ public void testRequiresWritePathRepairMVOnly() } @Test - public void testRequiresWritePathRepairCDCWithSystemProp() + public void testRequiresWritePathRepairCDCOnRepairEnabled() { + // Given cdc_on_repaired_enabled and materialized_views_on_repair_enabled are true + // requiresWritePath should return true for a table with CDC enabled. ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_TABLE); when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); @@ -118,16 +129,22 @@ public void testRequiresWritePathRepairCDCWithSystemProp() } @Test - public void testDoesNotRequiresWritePathRepairCDCOnly() + public void testDoesNotRequireWritePathRepairCDCOnRepairEnabledFalse() { + // Given cdc_on_repaired_enabled and materialized_views_on_repair_enabled are false + // requiresWritePath should return false for a table with CDC enabled. + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_TABLE); when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); CassandraStreamReceiver receiver1 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); assertFalse(receiver1.requiresWritePath(cfs)); - CassandraStreamReceiver receiver2 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + // When flipping cdc_on_repair_enabled to true + // requiresWritePath should return true. DatabaseDescriptor.setCDCOnRepairEnabled(true); + CassandraStreamReceiver receiver2 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); assertTrue(receiver2.requiresWritePath(cfs)); - } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 79b371fca347..2ba4ff9fbf3f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -35,7 +35,6 @@ import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.Options; -import org.apache.cassandra.utils.FBUtilities; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -161,24 +160,6 @@ public void testGetRepairThreads() assertEquals(5, result); } - @Test - public void testGetRepairSubRangeNum() - { - config.global_settings.number_of_subranges = 5; - - int result = config.getRepairSubRangeNum(repairType); - - assertEquals(5, result); - } - - @Test - public void testSetRepairSubRangeNum() - { - config.setRepairSubRangeNum(repairType, 5); - - assert config.getOptions(repairType).number_of_subranges == 5; - } - @Test public void testGetRepairMinFrequencyInHours() { @@ -390,16 +371,16 @@ public void testGetDefaultOptionsTokenRangeSplitter() { Options defaultOptions = Options.getDefaultOptions(); - ParameterizedClass expectedDefault = new ParameterizedClass(DefaultAutoRepairTokenSplitter.class.getName(), Collections.emptyMap()); + ParameterizedClass expectedDefault = new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()); assertEquals(expectedDefault, defaultOptions.token_range_splitter); - assertEquals(DefaultAutoRepairTokenSplitter.class.getName(), FBUtilities.newAutoRepairTokenRangeSplitter(defaultOptions.token_range_splitter).getClass().getName()); + assertEquals(RepairTokenRangeSplitter.class.getName(), AutoRepair.newAutoRepairTokenRangeSplitter(repairType, defaultOptions.token_range_splitter).getClass().getName()); } @Test(expected = ConfigurationException.class) public void testInvalidTokenRangeSplitter() { - FBUtilities.newAutoRepairTokenRangeSplitter(new ParameterizedClass("invalid-class", Collections.emptyMap())); + AutoRepair.newAutoRepairTokenRangeSplitter(repairType, new ParameterizedClass("invalid-class", Collections.emptyMap())); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java index 086d8c114897..f3ecaacaacfd 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Set; import java.util.Iterator; +import java.util.Map; import org.junit.BeforeClass; import org.junit.Test; @@ -109,12 +110,10 @@ public void testTokenRangesSplitByTable() } } - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setRepairSubRangeNum(repairType, numberOfSplits); - List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, plan); + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Map.of(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSplits))) + .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. assertTrue(keyspaceAssignments.hasNext()); @@ -162,12 +161,10 @@ public void testTokenRangesSplitByKeyspace() expectedToken.addAll(AutoRepairUtils.split(range, numberOfSplits)); } - AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setRepairSubRangeNum(repairType, numberOfSplits); - List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, plan); + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Map.of(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSplits))) + .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. assertTrue(keyspaceAssignments.hasNext()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 2d787bd88b72..7ed8fa3d61dd 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -21,9 +21,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; @@ -32,6 +34,7 @@ import com.google.common.collect.Sets; import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.cql3.statements.schema.TableAttributes; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -219,8 +222,11 @@ private void resetConfig() config.repair_type_overrides = defaultConfig.repair_type_overrides; config.global_settings = defaultConfig.global_settings; config.history_clear_delete_hosts_buffer_interval = defaultConfig.history_clear_delete_hosts_buffer_interval; - config.setRepairSubRangeNum(repairType, 1); config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); + // Use fixed splitter so we get a deterministic amount of repairs. + config.setTokenRangeSplitter(repairType, + new ParameterizedClass(FixedSplitTokenRangeSplitter.class.getName(), + Map.of(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, "1"))); } private void executeCQL() @@ -574,7 +580,7 @@ public void testTokenRangesNoSplit() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE); - Iterator keyspaceAssignments = new DefaultAutoRepairTokenSplitter().getRepairAssignments(repairType, true, plan); + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.emptyMap()).getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. assertTrue(keyspaceAssignments.hasNext()); @@ -672,6 +678,7 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); + // Expect configured retries for each table expected to be repaired assertEquals(config.getRepairMaxRetries()*expectedTablesGoingThroughRepair, sleepCalls.get()); verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java similarity index 78% rename from test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index d93a40b689f9..6741e060b219 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -43,21 +43,21 @@ import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.big.BigFormat; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; -import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.FilteredRepairAssignments; -import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SizeEstimate; -import org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SizedRepairAssignment; +import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.FilteredRepairAssignments; +import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.SizeEstimate; +import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.SizedRepairAssignment; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.concurrent.Refs; -import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.MAX_BYTES_PER_SCHEDULE; -import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.SUBRANGE_SIZE; -import static org.apache.cassandra.repair.autorepair.RepairRangeSplitter.TABLE_BATCH_LIMIT; +import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.MAX_BYTES_PER_SCHEDULE; +import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.BYTES_PER_ASSIGNMENT; +import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.MAX_TABLES_PER_ASSIGNMENT; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class RepairRangeSplitterTest extends CQLTester +public class RepairTokenRangeSplitterTest extends CQLTester { - private RepairRangeSplitter repairRangeSplitter; + private RepairTokenRangeSplitter repairRangeSplitter; private String tableName; private static Range FULL_RANGE; @@ -74,7 +74,7 @@ public static void setUpClass() @Before public void setUp() { - repairRangeSplitter = new RepairRangeSplitter(Collections.emptyMap()); + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); assertTrue(BigFormat.isSelected()); } @@ -83,9 +83,9 @@ public void setUp() public void testSizePartitionCount() throws Throwable { insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - Refs sstables = RepairRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE); + Refs sstables = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE); assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); - SizeEstimate sizes = RepairRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); + SizeEstimate sizes = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); assertEquals(10, sizes.partitions); } @@ -101,10 +101,10 @@ public void testSizePartitionCountSplit() throws Throwable Range tokenRange2 = range.next(); Assert.assertFalse(range.hasNext()); - Refs sstables1 = RepairRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); - Refs sstables2 = RepairRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2); - SizeEstimate sizes1 = RepairRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); - SizeEstimate sizes2 = RepairRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); + Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); + Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2); + SizeEstimate sizes1 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); + SizeEstimate sizes2 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); // +-5% because HLL merge and the applying of range size approx ratio causes estimation errors assertTrue(Math.abs(10000 - (sizes1.partitions + sizes2.partitions)) <= 60); } @@ -113,7 +113,7 @@ public void testSizePartitionCountSplit() throws Throwable public void testGetRepairAssignmentsForTable_NoSSTables() { Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, ranges); assertEquals(0, assignments.size()); } @@ -121,15 +121,15 @@ public void testGetRepairAssignmentsForTable_NoSSTables() public void testGetRepairAssignmentsForTable_Single() throws Throwable { Collection> ranges = Collections.singleton(new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken())); - insertAndFlushSingleTable(tableName); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(RepairType.FULL, CQLTester.KEYSPACE, tableName, ranges); + insertAndFlushSingleTable(); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, ranges); assertEquals(1, assignments.size()); } @Test public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable { - repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "2")); + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "2")); Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); @@ -144,7 +144,7 @@ public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable @Test public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable { - repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "2")); + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "2")); Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(2); @@ -158,7 +158,7 @@ public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable @Test public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable { - repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "1")); + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "1")); Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); @@ -170,7 +170,7 @@ public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable @Test public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable { - repairRangeSplitter = new RepairRangeSplitter(Collections.singletonMap(TABLE_BATCH_LIMIT, "100")); + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "100")); Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(5); @@ -184,7 +184,7 @@ public void testMergeEmptyAssignments() { // Test when the list of assignments is empty List emptyAssignments = Collections.emptyList(); - RepairRangeSplitter.merge(emptyAssignments); + RepairTokenRangeSplitter.merge(emptyAssignments); } @Test @@ -197,7 +197,7 @@ public void testMergeSingleAssignment() SizedRepairAssignment assignment = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames); List assignments = Collections.singletonList(assignment); - SizedRepairAssignment result = RepairRangeSplitter.merge(assignments); + SizedRepairAssignment result = RepairTokenRangeSplitter.merge(assignments); assertEquals(FULL_RANGE, result.getTokenRange()); assertEquals(keyspaceName, result.getKeyspaceName()); @@ -216,7 +216,7 @@ public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2); List assignments = Arrays.asList(assignment1, assignment2); - SizedRepairAssignment result = RepairRangeSplitter.merge(assignments); + SizedRepairAssignment result = RepairTokenRangeSplitter.merge(assignments); assertEquals(FULL_RANGE, result.getTokenRange()); assertEquals(keyspaceName, result.getKeyspaceName()); @@ -239,7 +239,7 @@ public void testMergeDifferentTokenRange() SizedRepairAssignment assignment2 = new SizedRepairAssignment(tokenRange2, keyspaceName, tableNames); List assignments = Arrays.asList(assignment1, assignment2); - RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException + RepairTokenRangeSplitter.merge(assignments); // Should throw IllegalStateException } @Test(expected = IllegalStateException.class) @@ -252,7 +252,7 @@ public void testMergeDifferentKeyspaceName() SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, "keyspace2", tableNames); List assignments = Arrays.asList(assignment1, assignment2); - RepairRangeSplitter.merge(assignments); // Should throw IllegalStateException + RepairTokenRangeSplitter.merge(assignments); // Should throw IllegalStateException } @Test @@ -267,7 +267,7 @@ public void testMergeWithDuplicateTables() SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2); List assignments = Arrays.asList(assignment1, assignment2); - RepairAssignment result = RepairRangeSplitter.merge(assignments); + RepairAssignment result = RepairTokenRangeSplitter.merge(assignments); // The merged result should contain all unique table names assertEquals(new HashSet<>(Arrays.asList("table1", "table2", "table3")), new HashSet<>(result.getTableNames())); @@ -278,12 +278,12 @@ public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxByte { // Ensures that getRepairAssignments splits by SUBRANGE_SIZE and filterRepairAssignments limits by MAX_BYTES_PER_SCHEDULE. Map parameters = new HashMap<>(); - parameters.put(SUBRANGE_SIZE, "50GiB"); + parameters.put(BYTES_PER_ASSIGNMENT, "50GiB"); parameters.put(MAX_BYTES_PER_SCHEDULE, "100GiB"); - repairRangeSplitter = new RepairRangeSplitter(parameters); + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.INCREMENTAL, parameters); // Given a size estimate of 1024GiB, we should expect 21 splits (50GiB*21 = 1050GiB < 1024GiB) - SizeEstimate sizeEstimate = sizeEstimateByBytes(RepairType.INCREMENTAL, new LongMebibytesBound("1024GiB")); + SizeEstimate sizeEstimate = sizeEstimateByBytes(new LongMebibytesBound("1024GiB")); List assignments = repairRangeSplitter.getRepairAssignments(Collections.singletonList(sizeEstimate)); @@ -297,24 +297,24 @@ public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxByte } // When filtering we should only get 2 assignments back (48.76 * 2 < 100GiB) - FilteredRepairAssignments filteredRepairAssignments = repairRangeSplitter.filterRepairAssignments(RepairType.INCREMENTAL, 0, KEYSPACE, assignments, 0); + FilteredRepairAssignments filteredRepairAssignments = repairRangeSplitter.filterRepairAssignments(0, KEYSPACE, assignments, 0); List finalRepairAssignments = filteredRepairAssignments.repairAssignments; assertEquals(2, finalRepairAssignments.size()); assertEquals(expectedBytes*2, filteredRepairAssignments.newBytesSoFar); } - private SizeEstimate sizeEstimateByBytes(RepairType repairType, LongMebibytesBound totalSize) + private SizeEstimate sizeEstimateByBytes(LongMebibytesBound totalSize) { - return sizeEstimateByBytes(repairType, totalSize, totalSize); + return sizeEstimateByBytes(totalSize, totalSize); } - private SizeEstimate sizeEstimateByBytes(RepairType repairType, LongMebibytesBound sizeInRange, LongMebibytesBound totalSize) + private SizeEstimate sizeEstimateByBytes(LongMebibytesBound sizeInRange, LongMebibytesBound totalSize) { - return new SizeEstimate(repairType, KEYSPACE, "table1", FULL_RANGE, 1, sizeInRange.toBytes(), totalSize.toBytes()); + return new SizeEstimate(RepairType.INCREMENTAL, KEYSPACE, "table1", FULL_RANGE, 1, sizeInRange.toBytes(), totalSize.toBytes()); } - private void insertAndFlushSingleTable(String tableName) throws Throwable + private void insertAndFlushSingleTable() throws Throwable { execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); flush(); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index f34e1f0a7071..c69809618216 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -68,7 +68,6 @@ public static Collection testCases() { return Stream.of( forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), - forEachRepairType(200, AutoRepairService.instance::setRepairSubRangeNum, config::getRepairSubRangeNum), forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index e12af34af8d8..7fde3920afda 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -175,7 +175,7 @@ public void setUp() throws Exception @Test(expected = IllegalArgumentException.class) public void testNoArgs() { - cmd.repairType = repairType; + cmd.repairTypeStr = repairType.name(); cmd.execute(probe); } @@ -183,7 +183,7 @@ public void testNoArgs() public void testRepairSchedulingDisabled() { when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); - cmd.repairType = repairType; + cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("threads", "1"); cmd.execute(probe); @@ -196,7 +196,7 @@ public void testRepairSchedulingDisabled() public void testRepairTypeDisabled() { config.setAutoRepairEnabled(repairType, false); - cmd.repairType = repairType; + cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("number_of_repair_threads", "1"); cmd.execute(probe); @@ -208,7 +208,7 @@ public void testRepairTypeDisabled() @Test public void testV2FlagMissing() { - cmd.repairType = repairType; + cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("threads", "1"); try @@ -228,17 +228,16 @@ public void testV2FlagMissing() @Test(expected = IllegalArgumentException.class) public void testInvalidParamType() { - cmd.repairType = repairType; + cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("unknown_type", "1"); cmd.execute(probe); } - @Test public void testPriorityHosts() { - cmd.repairType = repairType; + cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("priority_hosts", String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1))); cmd.execute(probe); @@ -249,7 +248,7 @@ public void testPriorityHosts() @Test public void testForceRepairHosts() { - cmd.repairType = repairType; + cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("forcerepair_hosts", String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1))); cmd.execute(probe); @@ -279,7 +278,6 @@ public static Collection testCases() return Stream.of( forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type, true)), forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setRepairThreads(type, 1)), - forEachRepairType("number_of_subranges", "2", (type) -> verify(probe, times(1)).setRepairSubRangeNum(type, 2)), forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setRepairMinInterval(type, "3h")), forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type, 4)), forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type, "5s")), @@ -287,7 +285,8 @@ public static Collection testCases() forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type, 6)), forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type, 7)), forEachRepairType("mv_repair_enabled", "true", (type) -> verify(probe, times(1)).setMVRepairEnabled(type, true)), - forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))) + forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))), + forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type, "max_bytes_per_schedule", "500GiB")) ).flatMap(Function.identity()).collect(Collectors.toList()); } @@ -318,7 +317,7 @@ public void setUp() @Test public void test() { - cmd.repairType = repairType; + cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of(paramType, paramVal); cmd.execute(probe); From 94df667925b6033bb9d32d8ba75bed63533bc492 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Thu, 2 Jan 2025 15:36:57 -0600 Subject: [PATCH 114/257] JDK 8 support and test rename --- .../autorepair/FixedSplitTokenRangeSplitter.java | 6 +++++- .../autorepair/RepairTokenRangeSplitter.java | 6 +++--- ....java => FixedSplitTokenRangeSplitterTest.java} | 14 ++++++++++---- 3 files changed, 18 insertions(+), 8 deletions(-) rename test/unit/org/apache/cassandra/repair/autorepair/{AutoRepairDefaultTokenSplitterParameterizedTest.java => FixedSplitTokenRangeSplitterTest.java} (95%) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 9fb39199d745..ede4c0c32469 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -133,7 +134,10 @@ public void setParameter(String key, String value) @Override public Map getParameters() { - return Map.of(NUMBER_OF_SUBRANGES, Integer.toString(getNumberOfSubranges())); + return Collections.unmodifiableMap(new LinkedHashMap() + {{ + put(NUMBER_OF_SUBRANGES, Integer.toString(getNumberOfSubranges())); + }}); } public void setNumberOfSubranges(int numberOfSubranges) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index e724560eaad4..b71a6712c311 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -171,7 +171,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter static final String MAX_TABLES_PER_ASSIGNMENT = "max_tables_per_assignment"; static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; - static final List PARAMETERS = List.of(BYTES_PER_ASSIGNMENT, PARTITIONS_PER_ASSIGNMENT, MAX_TABLES_PER_ASSIGNMENT, MAX_BYTES_PER_SCHEDULE); + static final String[] PARAMETERS = { BYTES_PER_ASSIGNMENT, PARTITIONS_PER_ASSIGNMENT, MAX_TABLES_PER_ASSIGNMENT, MAX_BYTES_PER_SCHEDULE }; private final AutoRepairConfig.RepairType repairType; @@ -199,7 +199,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 500GiB of * data is written to a node per min_repair_interval. */ - private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) {{ + private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap(AutoRepairConfig.RepairType.class) {{ put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) .build()); // Restrict incremental repair to 50GB bytes per assignment to confine the amount of possible autocompaction. @@ -794,7 +794,7 @@ public Map getParameters() parameters.put(parameter, ""); } } - return parameters; + return Collections.unmodifiableMap(parameters); } public DataStorageSpec.LongBytesBound getBytesPerAssignment() diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java similarity index 95% rename from test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index f3ecaacaacfd..79ff941360c1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairDefaultTokenSplitterParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -22,9 +22,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Set; -import java.util.Iterator; import java.util.Map; import org.junit.BeforeClass; @@ -51,13 +52,18 @@ import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) -public class AutoRepairDefaultTokenSplitterParameterizedTest +public class FixedSplitTokenRangeSplitterTest { private static final String KEYSPACE = "ks"; private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; + private final Map splitterParams = new LinkedHashMap() + {{ + put(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(4)); + }}; + @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; @@ -112,7 +118,7 @@ public void testTokenRangesSplitByTable() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Map.of(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSplits))) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, splitterParams) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -163,7 +169,7 @@ public void testTokenRangesSplitByKeyspace() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Map.of(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSplits))) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, splitterParams) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. From 43bf6e3f57265897f906c719b3b9c016dbfa4d97 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Thu, 2 Jan 2025 17:41:18 -0600 Subject: [PATCH 115/257] Fix newAutoRepairTokenRangeSplitter duplication --- .../repair/autorepair/AutoRepair.java | 39 ------------------- .../IAutoRepairTokenRangeSplitter.java | 2 +- .../autorepair/AutoRepairConfigTest.java | 4 +- 3 files changed, 3 insertions(+), 42 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 52324f917106..738fb9148e0c 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -19,7 +19,6 @@ package org.apache.cassandra.repair.autorepair; import java.util.ArrayList; -import java.util.Collections; import java.util.EnumMap; import java.util.HashSet; import java.util.Iterator; @@ -40,7 +39,6 @@ import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.config.DurationSpec; -import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Clock; @@ -110,43 +108,6 @@ protected AutoRepair() } } - @VisibleForTesting - static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(AutoRepairConfig.RepairType repairType, ParameterizedClass parameterizedClass) throws ConfigurationException - { - try - { - Class tokenRangeSplitterClass; - final String className; - if (parameterizedClass.class_name != null && !parameterizedClass.class_name.isEmpty()) - { - className = parameterizedClass.class_name.contains(".") ? - parameterizedClass.class_name : - "org.apache.cassandra.repair.autorepair." + parameterizedClass.class_name; - tokenRangeSplitterClass = FBUtilities.classForName(className, "token_range_splitter"); - } - else - { - // If token_range_splitter.class_name is not defined, just use default, this is for convenience. - tokenRangeSplitterClass = AutoRepairConfig.DEFAULT_SPLITTER; - } - try - { - Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); - // first attempt to initialize with RepairType and Map arguments. - return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(AutoRepairConfig.RepairType.class, Map.class).newInstance(repairType, parameters); - } - catch (NoSuchMethodException nsme) - { - // fall back on no argument constructor. - return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); - } - } - catch (Exception ex) - { - throw new ConfigurationException("Unable to create instance of IAutoRepairTokenRangeSplitter", ex); - } - } - public void setup() { // Ensure setup is done only once; this is only for unit tests diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java index 86bac32c96bb..8b82eac296db 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -32,7 +32,7 @@ * Custom implementations class should require a constructor accepting * ({@link AutoRepairConfig.RepairType}, {@link java.util.Map}) with the {@link java.util.Map} parameter accepting * custom configuration for your splitter. If such a constructor does not exist, - * {@link AutoRepair#newAutoRepairTokenRangeSplitter(AutoRepairConfig.RepairType, ParameterizedClass)} + * {@link AutoRepairConfig#newAutoRepairTokenRangeSplitter(AutoRepairConfig.RepairType, ParameterizedClass)} * will fall back on invoking a default zero argument constructor. */ public interface IAutoRepairTokenRangeSplitter diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 2ba4ff9fbf3f..8f02baa52f57 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -374,13 +374,13 @@ public void testGetDefaultOptionsTokenRangeSplitter() ParameterizedClass expectedDefault = new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()); assertEquals(expectedDefault, defaultOptions.token_range_splitter); - assertEquals(RepairTokenRangeSplitter.class.getName(), AutoRepair.newAutoRepairTokenRangeSplitter(repairType, defaultOptions.token_range_splitter).getClass().getName()); + assertEquals(RepairTokenRangeSplitter.class.getName(), AutoRepairConfig.newAutoRepairTokenRangeSplitter(repairType, defaultOptions.token_range_splitter).getClass().getName()); } @Test(expected = ConfigurationException.class) public void testInvalidTokenRangeSplitter() { - AutoRepair.newAutoRepairTokenRangeSplitter(repairType, new ParameterizedClass("invalid-class", Collections.emptyMap())); + AutoRepairConfig.newAutoRepairTokenRangeSplitter(repairType, new ParameterizedClass("invalid-class", Collections.emptyMap())); } @Test From 318a69ed1e585a06d02e5b84a08c29c9d9a23da9 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Fri, 3 Jan 2025 11:03:56 -0600 Subject: [PATCH 116/257] Nit-fixing in existing code * Move curly brackets to new line * Remove unused config declaration in constructor --- .../cassandra/cql3/UntypedResultSet.java | 3 +- .../repair/autorepair/AutoRepair.java | 1 - .../cassandra/service/AutoRepairService.java | 3 +- .../cassandra/service/StorageService.java | 9 ++- .../org/apache/cassandra/tools/NodeProbe.java | 12 ++- .../AutoRepairStateFactoryTest.java | 15 ++-- .../autorepair/AutoRepairStateTest.java | 79 +++++++++++++------ 7 files changed, 81 insertions(+), 41 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/UntypedResultSet.java b/src/java/org/apache/cassandra/cql3/UntypedResultSet.java index 872ba7cf0d1e..f70c1211e969 100644 --- a/src/java/org/apache/cassandra/cql3/UntypedResultSet.java +++ b/src/java/org/apache/cassandra/cql3/UntypedResultSet.java @@ -384,7 +384,8 @@ public long getLong(String column) // this function will return the default value if the row doesn't have that column or the column data is null // This function is used to avoid the nullpointerexception - public long getLong(String column, long ifNull) { + public long getLong(String column, long ifNull) + { ByteBuffer bytes = data.get(column); return bytes == null ? ifNull : LongType.instance.compose(bytes); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 738fb9148e0c..46e84e59f25b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -96,7 +96,6 @@ public class AutoRepair @VisibleForTesting protected AutoRepair() { - AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 1a9c010928a7..4056e6e6a930 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -98,7 +98,8 @@ public void setRepairPriorityForHosts(RepairType repairType, Set getRepairHostPriority(RepairType repairType) { + public Set getRepairHostPriority(RepairType repairType) + { return AutoRepairUtils.getPriorityHosts(repairType); } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index a635b8f7b7b6..f1302752b653 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -5671,7 +5671,8 @@ public void alterTopology(String changes) }); } - public List getTablesForKeyspace(String keyspace) { + public List getTablesForKeyspace(String keyspace) + { return Keyspace.open(keyspace).getColumnFamilyStores().stream().map(cfs -> cfs.name).collect(Collectors.toList()); } @@ -5679,7 +5680,8 @@ public List mutateSSTableRepairedState(boolean repaired, boolean preview { Map tables = Keyspace.open(keyspace).getColumnFamilyStores() .stream().collect(Collectors.toMap(c -> c.name, c -> c)); - for (String tableName : tableNames) { + for (String tableName : tableNames) + { if (!tables.containsKey(tableName)) throw new InvalidRequestException("Table " + tableName + " does not exist in keyspace " + keyspace); } @@ -5690,7 +5692,8 @@ public List mutateSSTableRepairedState(boolean repaired, boolean preview // mutate SSTables long repairedAt = !repaired ? 0 : currentTimeMillis(); List sstablesTouched = new ArrayList<>(); - for (String tableName : tableNames) { + for (String tableName : tableNames) + { ColumnFamilyStore table = tables.get(tableName); Set result = table.runWithCompactionsDisabled(() -> { Set sstables = table.getLiveSSTables().stream().filter(predicate).collect(Collectors.toSet()); diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 8f606900022a..d11cf02aa76b 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2564,7 +2564,8 @@ public void abortBootstrap(String nodeId, String endpoint) ssProxy.abortBootstrap(nodeId, endpoint); } - public AutoRepairConfig getAutoRepairConfig() { + public AutoRepairConfig getAutoRepairConfig() + { return autoRepairProxy.getAutoRepairConfig(); } @@ -2647,11 +2648,13 @@ public void setAutoRepairIgnoreDCs(AutoRepairConfig.RepairType repairType, Set mutateSSTableRepairedState(boolean repair, boolean preview, return ssProxy.mutateSSTableRepairedState(repair, preview, keyspace, tables); } - public List getTablesForKeyspace(String keyspace) { + public List getTablesForKeyspace(String keyspace) + { return ssProxy.getTablesForKeyspace(keyspace); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java index a45583b9bf87..15d62c536a06 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -29,7 +29,8 @@ public class AutoRepairStateFactoryTest { @Test - public void testGetRepairState() { + public void testGetRepairState() + { AutoRepairState state = RepairType.getAutoRepairState(RepairType.FULL); assertTrue(state instanceof FullRepairState); @@ -44,12 +45,16 @@ public void testGetRepairState() { } @Test - public void testGetRepairStateSupportsAllRepairTypes() { - for (RepairType repairType : RepairType.values()) { - try { + public void testGetRepairStateSupportsAllRepairTypes() + { + for (RepairType repairType : RepairType.values()) + { + try + { AutoRepairState state = RepairType.getAutoRepairState(repairType); assertNotNull(state); - } catch (IllegalArgumentException e) { + } catch (IllegalArgumentException e) + { assertNull(e); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index f0974dd83c1a..63e1ee4aaa0d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -67,14 +67,16 @@ public static Collection repairTypes() } @Before - public void setUp() { + public void setUp() + { AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); initMocks(this); createTable(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int)", KEYSPACE, testTable)); } @Test - public void testGetRepairRunnable() { + public void testGetRepairRunnable() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); AutoRepairService.setup(); @@ -138,7 +140,8 @@ public void testWaitForRepairToComplete() throws Exception } @Test - public void testGetLastRepairTime() { + public void testGetLastRepairTime() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.lastRepairTimeInMs = 1; @@ -146,7 +149,8 @@ public void testGetLastRepairTime() { } @Test - public void testSetTotalTablesConsideredForRepair() { + public void testSetTotalTablesConsideredForRepair() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setTotalTablesConsideredForRepair(1); @@ -155,7 +159,8 @@ public void testSetTotalTablesConsideredForRepair() { } @Test - public void testGetTotalTablesConsideredForRepair() { + public void testGetTotalTablesConsideredForRepair() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.totalTablesConsideredForRepair = 1; @@ -163,7 +168,8 @@ public void testGetTotalTablesConsideredForRepair() { } @Test - public void testSetLastRepairTimeInMs() { + public void testSetLastRepairTimeInMs() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setLastRepairTime(1); @@ -172,7 +178,8 @@ public void testSetLastRepairTimeInMs() { } @Test - public void testGetClusterRepairTimeInSec() { + public void testGetClusterRepairTimeInSec() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.clusterRepairTimeInSec = 1; @@ -180,7 +187,8 @@ public void testGetClusterRepairTimeInSec() { } @Test - public void testGetNodeRepairTimeInSec() { + public void testGetNodeRepairTimeInSec() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.nodeRepairTimeInSec = 1; @@ -188,7 +196,8 @@ public void testGetNodeRepairTimeInSec() { } @Test - public void testSetRepairInProgress() { + public void testSetRepairInProgress() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setRepairInProgress(true); @@ -197,7 +206,8 @@ public void testSetRepairInProgress() { } @Test - public void testIsRepairInProgress() { + public void testIsRepairInProgress() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.repairInProgress = true; @@ -205,7 +215,8 @@ public void testIsRepairInProgress() { } @Test - public void testSetSkippedTokenRangesCount() { + public void testSetSkippedTokenRangesCount() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setSkippedTokenRangesCount(1); @@ -214,7 +225,8 @@ public void testSetSkippedTokenRangesCount() { } @Test - public void testGetSkippedTokenRangesCount() { + public void testGetSkippedTokenRangesCount() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.skippedTokenRangesCount = 1; @@ -222,7 +234,8 @@ public void testGetSkippedTokenRangesCount() { } @Test - public void testGetLongestUnrepairedSecNull() { + public void testGetLongestUnrepairedSecNull() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.longestUnrepairedNode = null; @@ -235,7 +248,8 @@ public void testGetLongestUnrepairedSecNull() { } @Test - public void testGetLongestUnrepairedSec() { + public void testGetLongestUnrepairedSec() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.longestUnrepairedNode = new AutoRepairHistory(UUID.randomUUID(), "", 0, 1000, null, 0, false); @@ -250,7 +264,8 @@ public void testGetLongestUnrepairedSec() { } @Test - public void testSetTotalMVTablesConsideredForRepair() { + public void testSetTotalMVTablesConsideredForRepair() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setTotalMVTablesConsideredForRepair(1); @@ -259,7 +274,8 @@ public void testSetTotalMVTablesConsideredForRepair() { } @Test - public void testGetTotalMVTablesConsideredForRepair() { + public void testGetTotalMVTablesConsideredForRepair() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.totalMVTablesConsideredForRepair = 1; @@ -267,7 +283,8 @@ public void testGetTotalMVTablesConsideredForRepair() { } @Test - public void testSetNodeRepairTimeInSec() { + public void testSetNodeRepairTimeInSec() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setNodeRepairTimeInSec(1); @@ -276,7 +293,8 @@ public void testSetNodeRepairTimeInSec() { } @Test - public void testSetClusterRepairTimeInSec() { + public void testSetClusterRepairTimeInSec() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setClusterRepairTimeInSec(1); @@ -285,15 +303,18 @@ public void testSetClusterRepairTimeInSec() { } @Test - public void testSetRepairKeyspaceCount() { + public void testSetRepairKeyspaceCount() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setRepairKeyspaceCount(1); assertEquals(1, state.repairKeyspaceCount); } + @Test - public void testGetRepairKeyspaceCount() { + public void testGetRepairKeyspaceCount() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.repairKeyspaceCount = 1; @@ -301,7 +322,8 @@ public void testGetRepairKeyspaceCount() { } @Test - public void testSetLongestUnrepairedNode() { + public void testSetLongestUnrepairedNode() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); @@ -311,7 +333,8 @@ public void testSetLongestUnrepairedNode() { } @Test - public void testSetSucceededTokenRangesCount() { + public void testSetSucceededTokenRangesCount() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setSucceededTokenRangesCount(1); @@ -320,7 +343,8 @@ public void testSetSucceededTokenRangesCount() { } @Test - public void testGetSucceededTokenRangesCount() { + public void testGetSucceededTokenRangesCount() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.succeededTokenRangesCount = 1; @@ -328,7 +352,8 @@ public void testGetSucceededTokenRangesCount() { } @Test - public void testSetFailedTokenRangesCount() { + public void testSetFailedTokenRangesCount() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.setFailedTokenRangesCount(1); @@ -337,7 +362,8 @@ public void testSetFailedTokenRangesCount() { } @Test - public void testGetFailedTokenRangesCount() { + public void testGetFailedTokenRangesCount() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.failedTokenRangesCount = 1; @@ -345,7 +371,8 @@ public void testGetFailedTokenRangesCount() { } @Test - public void isSuccess() { + public void isSuccess() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.success = true; From 2a9aae00583d6c0df8da19b00a6e4ca013ddbfe4 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Fri, 3 Jan 2025 11:04:35 -0600 Subject: [PATCH 117/257] Process feedback Also remove redundant AutoRepairConfig.RepairType for RepairType in AutoRepairConfig. --- .../repair/autorepair/AutoRepairConfig.java | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 2e1d5f0a7276..9be32a47318e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -67,7 +67,7 @@ public class AutoRepairConfig implements Serializable public static final Class DEFAULT_SPLITTER = RepairTokenRangeSplitter.class; // make transient so gets consturcted in the implementation. - private final transient Map tokenRangeSplitters = new EnumMap<>(AutoRepairConfig.RepairType.class); + private final transient Map tokenRangeSplitters = new EnumMap<>(RepairType.class); public enum RepairType implements Serializable { @@ -82,11 +82,6 @@ public enum RepairType implements Serializable this.configName = LocalizeString.toLowerCaseLocalized(name()); } - RepairType(String configName) - { - this.configName = configName; - } - /** * @return Format of the repair type as it should be represented in configuration. * Canonically this is the enum name in lowerCase. @@ -311,7 +306,8 @@ public ParameterizedClass getTokenRangeSplitter(RepairType repairType) /** * Set a new token range splitter, this is not meant to be used other than for testing. */ - public void setTokenRangeSplitter(RepairType repairType, ParameterizedClass tokenRangeSplitter) + @VisibleForTesting + void setTokenRangeSplitter(RepairType repairType, ParameterizedClass tokenRangeSplitter) { getOptions(repairType).token_range_splitter = tokenRangeSplitter; tokenRangeSplitters.remove(repairType); @@ -319,13 +315,8 @@ public void setTokenRangeSplitter(RepairType repairType, ParameterizedClass toke public IAutoRepairTokenRangeSplitter getTokenRangeSplitterInstance(RepairType repairType) { - IAutoRepairTokenRangeSplitter splitter = tokenRangeSplitters.get(repairType); - if (splitter == null) - { - splitter = newAutoRepairTokenRangeSplitter(repairType, getTokenRangeSplitter(repairType)); - tokenRangeSplitters.put(repairType, splitter); - } - return splitter; + return tokenRangeSplitters.computeIfAbsent(repairType, + key -> newAutoRepairTokenRangeSplitter(key, getTokenRangeSplitter(key))); } public void setInitialSchedulerDelay(RepairType repairType, String initialSchedulerDelay) @@ -349,11 +340,11 @@ public void setRepairSessionTimeout(RepairType repairType, String repairSessionT } @VisibleForTesting - static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(AutoRepairConfig.RepairType repairType, ParameterizedClass parameterizedClass) throws ConfigurationException + static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(RepairType repairType, ParameterizedClass parameterizedClass) throws ConfigurationException { try { - Class tokenRangeSplitterClass; + Class tokenRangeSplitterClass; final String className; if (parameterizedClass.class_name != null && !parameterizedClass.class_name.isEmpty()) { @@ -371,12 +362,12 @@ static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(AutoRepairC { Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); // first attempt to initialize with RepairType and Map arguments. - return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor(AutoRepairConfig.RepairType.class, Map.class).newInstance(repairType, parameters); + return tokenRangeSplitterClass.getConstructor(RepairType.class, Map.class).newInstance(repairType, parameters); } catch (NoSuchMethodException nsme) { // fall back on no argument constructor. - return (IAutoRepairTokenRangeSplitter) tokenRangeSplitterClass.getConstructor().newInstance(); + return tokenRangeSplitterClass.getConstructor().newInstance(); } } catch (Exception ex) From c30f5cf1858411cb3e82c1e49b5cf0e0aa41fd6d Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sat, 4 Jan 2025 16:02:49 -0800 Subject: [PATCH 118/257] Fix the the total expected sub-ranges calculation keeping v-nodes in mind --- .../autorepair/RepairTokenRangeSplitter.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index b71a6712c311..19ed06bef70d 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -538,6 +538,18 @@ protected List getRepairAssignments(List si // since its possible for us to hit maxBytesPerSchedule before seeing all ranges, shuffle so there is chance // at least of hitting all the ranges _eventually_ for the worst case scenarios Collections.shuffle(sizeEstimates); + int totalExpectedSubRanges = 0; + for (SizeEstimate estimate : sizeEstimates) + { + if (estimate.sizeForRepair != 0) + { + boolean needsSplitting = estimate.sizeForRepair > bytesPerAssignment.toBytes() || estimate.partitions > partitionsPerAssignment; + if (needsSplitting) + { + totalExpectedSubRanges += calculateNumberOfSplits(estimate); + } + } + } for (SizeEstimate estimate : sizeEstimates) { if (estimate.sizeForRepair == 0) @@ -567,7 +579,7 @@ protected List getRepairAssignments(List si for (Range subrange : subranges) { SizedRepairAssignment assignment = new SizedRepairAssignment(subrange, estimate.keyspace, Collections.singletonList(estimate.table), - String.format("subrange %d of %d", repairAssignments.size()+1, numberOfSplits), + String.format("subrange %d of %d", repairAssignments.size()+1, totalExpectedSubRanges), approximateBytesPerSplit); repairAssignments.add(assignment); } From b3462b26a51b35979eaa9d8ec023916aeb02b20f Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:35:24 -0600 Subject: [PATCH 119/257] Feedback and update tests * Promote node not being present in gossip to warn * Clean up NUMBER_OF_SUBRANGES doc * Simplify default map parameter parsing in both splitters * Doc cleanup and make RepairAssignmentIterator fully public * Remove unnecessary getters and setters in Splitters * Handle case where ColumnFamilyStore not retrievable, in this case return no assignments as we can assume deleted. * Always return a RepairAssignment for a table, even if empty, in RepairTokenRangeSplitter as node may have missed writes * Add set|getParameters tests * Update AutoRepairParameterizedTest to use RepairTokenRangeSplitter * Move no-split specific test to FixedSplitTokenRangeSplitterTest --- .../repair/autorepair/AutoRepairUtils.java | 2 +- .../FixedSplitTokenRangeSplitter.java | 62 +++--- .../autorepair/RepairAssignmentIterator.java | 15 +- .../autorepair/RepairTokenRangeSplitter.java | 190 +++++++----------- .../service/AutoRepairServiceMBean.java | 2 +- .../AutoRepairParameterizedTest.java | 58 +----- .../FixedSplitTokenRangeSplitterTest.java | 35 +++- .../RepairTokenRangeSplitterTest.java | 46 ++++- 8 files changed, 188 insertions(+), 222 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index e88c4fb2aa99..29204670028b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -434,7 +434,7 @@ protected static TreeSet getHostIdsInCurrentRing(RepairType repairType, Co } else { - logger.info("Node is not present in Gossip cache node {}, node data center {}", node, nodeDC); + logger.warn("Node is not present in Gossip cache node {}, node data center {}", node, nodeDC); } } return hostIdsInCurrentRing; diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index ede4c0c32469..07f186b2e748 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -22,10 +22,12 @@ import java.util.Collection; import java.util.Collections; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.dht.Range; @@ -36,13 +38,22 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { + private static final Logger logger = LoggerFactory.getLogger(FixedSplitTokenRangeSplitter.class); + /** - * The number of subranges to split each to-be-repaired token range into, - * the higher this number, the smaller the repair sessions will be - * How many subranges to divide one range into? The default is 1. - * If you are using v-node, say 256, then the repair will always go one v-node range at a time, this parameter, additionally, will let us further subdivide a given v-node range into sub-ranges. - * With the value “1” and v-nodes of 256, a given table on a node will undergo the repair 256 times. But with a value “2,” the same table on a node will undergo a repair 512 times because every v-node range will be further divided by two. - * If you do not use v-nodes or the number of v-nodes is pretty small, say 8, setting this value to a higher number, say 16, will be useful to repair on a smaller range, and the chance of succeeding is higher. + * The number of subranges to split each to-be-repaired token range into. Defaults to 1. + *

    + * The higher this number, the smaller the repair sessions will be. + *

    + * If you are using vnodes, say 256, then the repair will always go one vnode range at a time. This parameter, + * additionally, will let us further subdivide a given vnode range into subranges. + *

    + * With the value "1" and vnodes of 256, a given table on a node will undergo the repair 256 times. But with a + * value "2", the same table on a node will undergo a repair 512 times because every vnode range will be further + * divided by two. + *

    + * If you do not use vnodes or the number of vnodes is pretty small, say 8, setting this value to a higher number, + * such as 16, will be useful to repair on a smaller range, and the chance of succeeding is higher. */ static final String NUMBER_OF_SUBRANGES = "number_of_subranges"; @@ -53,14 +64,7 @@ public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map< { this.repairType = repairType; - if (parameters.containsKey(NUMBER_OF_SUBRANGES)) - { - numberOfSubranges = Integer.parseInt(parameters.get(NUMBER_OF_SUBRANGES)); - } - else - { - numberOfSubranges = 1; - } + numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, "1")); } @Override @@ -69,7 +73,7 @@ public Iterator getRepairAssignments(boolean primaryR return new RepairAssignmentIterator(repairPlans) { @Override - KeyspaceRepairAssignments nextInternal(int priority, KeyspaceRepairPlan repairPlan) + protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) { return getRepairAssignmentsForKeyspace(primaryRangeOnly, priority, repairPlan); } @@ -86,7 +90,7 @@ private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(boolean primar Collection> tokens = StorageService.instance.getPrimaryRanges(keyspaceName); if (!primaryRangeOnly) { - // if we need to repair non-primary token ranges, then change the tokens accrodingly + // if we need to repair non-primary token ranges, then change the tokens accordingly tokens = StorageService.instance.getLocalReplicas(keyspaceName).onlyFull().ranges(); } @@ -124,30 +128,18 @@ private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(boolean primar @Override public void setParameter(String key, String value) { - if (key.equals(NUMBER_OF_SUBRANGES)) + if (!key.equals(NUMBER_OF_SUBRANGES)) { - setNumberOfSubranges(Integer.parseInt(value)); + throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be " + NUMBER_OF_SUBRANGES); } - throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be " + NUMBER_OF_SUBRANGES); + + logger.info("Setting {} to {} for repair type {}", key, value, repairType); + this.numberOfSubranges = Integer.parseInt(value); } @Override public Map getParameters() { - return Collections.unmodifiableMap(new LinkedHashMap() - {{ - put(NUMBER_OF_SUBRANGES, Integer.toString(getNumberOfSubranges())); - }}); + return Collections.singletonMap(NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubranges)); } - - public void setNumberOfSubranges(int numberOfSubranges) - { - this.numberOfSubranges = numberOfSubranges; - } - - public int getNumberOfSubranges() - { - return this.numberOfSubranges; - } - } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java index 5e6f8b61c3b8..fc5573a48944 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java @@ -23,9 +23,9 @@ import java.util.NoSuchElementException; /** - * Convenience {@link Iterator} implementation for to assist implementations of + * Convenience {@link Iterator} implementation to assist implementations of * {@link IAutoRepairTokenRangeSplitter#getRepairAssignments(boolean, List)} by passing {@link KeyspaceRepairPlan} - * to a custom {@link #nextInternal(int, KeyspaceRepairPlan)} in priority order. + * to a custom {@link #next(int, KeyspaceRepairPlan)} method in priority order. */ public abstract class RepairAssignmentIterator implements Iterator { @@ -34,7 +34,7 @@ public abstract class RepairAssignmentIterator implements Iterator currentIterator = null; private PrioritizedRepairPlan currentPlan = null; - RepairAssignmentIterator(List repairPlans) + public RepairAssignmentIterator(List repairPlans) { this.repairPlanIterator = repairPlans.iterator(); } @@ -58,19 +58,20 @@ private synchronized Iterator currentIterator() @Override public boolean hasNext() { - return currentIterator().hasNext(); + Iterator iterator = currentIterator(); + return (iterator != null && iterator.hasNext()); } @Override public KeyspaceRepairAssignments next() { - if (!currentIterator.hasNext()) + if (!hasNext()) { throw new NoSuchElementException("No remaining repair plans"); } final KeyspaceRepairPlan repairPlan = currentIterator().next(); - return nextInternal(currentPlan.getPriority(), repairPlan); + return next(currentPlan.getPriority(), repairPlan); } /** @@ -80,5 +81,5 @@ public KeyspaceRepairAssignments next() * @return assignments for the given keyspace at this priority. Should never return null, if one desires to * short-circuit the iterator, override {@link #hasNext()}. */ - abstract KeyspaceRepairAssignments nextInternal(int priority, KeyspaceRepairPlan repairPlan); + protected abstract KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 19ed06bef70d..8851c99ddb8f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.EnumMap; @@ -30,7 +31,9 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import com.google.common.annotations.VisibleForTesting; @@ -136,7 +139,7 @@ * * *

  • - * incremental: Configured in a way that attempts to repair 50GiB of data per repair, and 500GiB per + * incremental: Configured in a way that attempts to repair 50GiB of data per repair, and 100GiB per * schedule. This attempts to throttle the amount of IR and anticompaction done per schedule after turning * incremental on for the first time. You may want to consider increasing max_bytes_per_schedule * more than this much data is written per min_repair_interval. @@ -144,7 +147,7 @@ *
  • bytes_per_assignment: 50GiB
  • *
  • partitions_per_assignment: 2^repair_session_max_tree_depth (2^20 == 1048576 by default)
  • *
  • max_tables_per_assignment: 64
  • - *
  • max_bytes_per_schedule: 500GiB
  • + *
  • max_bytes_per_schedule: 100GiB
  • * * *
  • @@ -171,7 +174,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter static final String MAX_TABLES_PER_ASSIGNMENT = "max_tables_per_assignment"; static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; - static final String[] PARAMETERS = { BYTES_PER_ASSIGNMENT, PARTITIONS_PER_ASSIGNMENT, MAX_TABLES_PER_ASSIGNMENT, MAX_BYTES_PER_SCHEDULE }; + static final List PARAMETERS = Arrays.asList(BYTES_PER_ASSIGNMENT, PARTITIONS_PER_ASSIGNMENT, MAX_TABLES_PER_ASSIGNMENT, MAX_BYTES_PER_SCHEDULE); private final AutoRepairConfig.RepairType repairType; @@ -194,9 +197,9 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter *
      max_bytes_per_schedule: 1000GiB
    *
  • * It's expected that these defaults should work well for everything except incremental, so we confine - * bytes_per_assignment to 50GiB and max_bytes_per_schedule to 500GiB. This should strike a good balance + * bytes_per_assignment to 50GiB and max_bytes_per_schedule to 100GiB. This should strike a good balance * between the amount of data that will be repaired during an initial migration to incremental repair and should - * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 500GiB of + * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 100GiB of * data is written to a node per min_repair_interval. */ private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap(AutoRepairConfig.RepairType.class) {{ @@ -205,65 +208,44 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter // Restrict incremental repair to 50GB bytes per assignment to confine the amount of possible autocompaction. put(AutoRepairConfig.RepairType.INCREMENTAL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.INCREMENTAL) .withBytesPerAssignment(new DataStorageSpec.LongBytesBound("50GiB")) - .withMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound("500GiB")) + .withMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound("100GiB")) .build()); put(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.PREVIEW_REPAIRED) .build()); }}; - public RepairTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map parameters) { this.repairType = repairType; this.givenParameters.putAll(parameters); - RepairTypeDefaults defaults = DEFAULTS_BY_REPAIR_TYPE.get(repairType); - if (parameters.containsKey(BYTES_PER_ASSIGNMENT)) - { - bytesPerAssignment = new DataStorageSpec.LongBytesBound(parameters.get(BYTES_PER_ASSIGNMENT)); - } - else - { - bytesPerAssignment = defaults.bytesPerAssignment; - } - - if (parameters.containsKey(PARTITIONS_PER_ASSIGNMENT)) - { - partitionsPerAssignment = Long.parseLong(parameters.get(PARTITIONS_PER_ASSIGNMENT)); - } - else - { - partitionsPerAssignment = defaults.partitionsPerAssignment; - } - - if (parameters.containsKey(MAX_TABLES_PER_ASSIGNMENT)) - { - maxTablesPerAssignment = Integer.parseInt(parameters.get(MAX_TABLES_PER_ASSIGNMENT)); - } - else{ - maxTablesPerAssignment = defaults.maxTablesPerAssignment; - } + reinitParameters(); + } - if (parameters.containsKey(MAX_BYTES_PER_SCHEDULE)) - { - maxBytesPerSchedule = new DataStorageSpec.LongBytesBound(parameters.get(MAX_BYTES_PER_SCHEDULE)); - } - else - { - maxBytesPerSchedule = defaults.maxBytesPerSchedule; - } + private void reinitParameters() + { + RepairTypeDefaults defaults = DEFAULTS_BY_REPAIR_TYPE.get(repairType); + DataStorageSpec.LongBytesBound bytesPerAssignmentTmp = getPropertyOrDefault(BYTES_PER_ASSIGNMENT, DataStorageSpec.LongBytesBound::new, defaults.bytesPerAssignment); + DataStorageSpec.LongBytesBound maxBytesPerScheduleTmp = getPropertyOrDefault(MAX_BYTES_PER_SCHEDULE, DataStorageSpec.LongBytesBound::new, defaults.maxBytesPerSchedule); - if (bytesPerAssignment.toBytes() > maxBytesPerSchedule.toBytes()) + // Validate that bytesPerAssignment <= maxBytesPerSchedule + if (bytesPerAssignmentTmp.toBytes() > maxBytesPerScheduleTmp.toBytes()) { throw new IllegalArgumentException(String.format("%s='%s' cannot be greater than %s='%s' for %s", BYTES_PER_ASSIGNMENT, - bytesPerAssignment.toBytes(), + bytesPerAssignmentTmp, MAX_BYTES_PER_SCHEDULE, - maxBytesPerSchedule, + maxBytesPerScheduleTmp, repairType.getConfigName())); } + bytesPerAssignment = bytesPerAssignmentTmp; + maxBytesPerSchedule = maxBytesPerScheduleTmp; + + partitionsPerAssignment = getPropertyOrDefault(PARTITIONS_PER_ASSIGNMENT, Long::parseLong, defaults.partitionsPerAssignment); + maxTablesPerAssignment = getPropertyOrDefault(MAX_TABLES_PER_ASSIGNMENT, Integer::parseInt, defaults.maxTablesPerAssignment); + logger.info("Configured {}[{}] with {}={}, {}={}, {}={}, {}={}", RepairTokenRangeSplitter.class.getName(), repairType.getConfigName(), BYTES_PER_ASSIGNMENT, bytesPerAssignment, @@ -272,12 +254,11 @@ public RepairTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map T getPropertyOrDefault(String propertyName, Function mapper, T defaultValue) { - return repairType; + return Optional.ofNullable(this.givenParameters.get(propertyName)).map(mapper).orElse(defaultValue); } - @Override public Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans) { @@ -300,7 +281,7 @@ private class BytesBasedRepairAssignmentIterator extends RepairAssignmentIterato } @Override - KeyspaceRepairAssignments nextInternal(int priority, KeyspaceRepairPlan repairPlan) + protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) { // short circuit if we've accumulated too many bytes by returning a KeyspaceRepairAssignments with // no assignments. We do this rather than returning false in hasNext() because we want to signal @@ -334,8 +315,18 @@ List getRepairAssignmentsForKeyspace(AutoRepairConfig.Rep tableNames.sort((t1, t2) -> { ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); - if (cfs1 == null || cfs2 == null) - throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, t1)); + // If for whatever reason the CFS is not retrievable, we can assume its been deleted, so give the + // other cfs precedence. + if (cfs1 == null) + { + // cfs1 is lesser than because its null + return -1; + } + else if (cfs2 == null) + { + // cfs1 is greather than because cfs2 is null + return 1; + } return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); }); } @@ -530,6 +521,11 @@ protected List getRepairAssignmentsForTable(String keyspa return getRepairAssignments(sizeEstimates); } + private static void logSkippingTable(String keyspaceName, String tableName) + { + logger.warn("Could not resolve table data for {}.{} assuming it has since been deleted, skipping", keyspaceName, tableName); + } + @VisibleForTesting protected List getRepairAssignments(List sizeEstimates) { @@ -555,16 +551,25 @@ protected List getRepairAssignments(List si if (estimate.sizeForRepair == 0) { ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(estimate.keyspace, estimate.table); + + if (cfs == null) + { + logSkippingTable(estimate.keyspace, estimate.table); + continue; + } + long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); if (memtableSize > 0L) { logger.debug("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", estimate.keyspace, estimate.table, estimate.tokenRange, memtableSize); - SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), "memtable only", memtableSize); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), "full primary rangee for table with memtable only detected", memtableSize); repairAssignments.add(assignment); } else { - logger.debug("Skipping {}.{} for range {} because it had no unrepaired SSTables and no memtable data", estimate.keyspace, estimate.table, estimate.tokenRange); + logger.debug("Included {}.{} range {}, has no SSTables or memtable data, but adding single repair assignment for entire range in case writes were missed", estimate.keyspace, estimate.table, estimate.tokenRange); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), "full primary range for table with no data detected", 0L); + repairAssignments.add(assignment); } } else @@ -724,14 +729,13 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Range tokenRange) { final ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); - if (cfs == null) { - throw new IllegalArgumentException(String.format("Could not resolve ColumnFamilyStore from %s.%s", keyspaceName, tableName)); + logSkippingTable(keyspaceName, tableName); + return Refs.ref(Collections.emptyList()); } Refs refs = null; - while (refs == null) { Iterable sstables = cfs.getTracker().getView().select(SSTableSet.CANONICAL); @@ -750,28 +754,14 @@ static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repa @Override public void setParameter(String key, String value) { - switch (key) + if (!PARAMETERS.contains(key)) { - case BYTES_PER_ASSIGNMENT: - setBytesPerAssignment(new DataStorageSpec.LongBytesBound(value)); - givenParameters.put(key, value); - return; - case PARTITIONS_PER_ASSIGNMENT: - setPartitionsPerAssignment(Long.parseLong(value)); - givenParameters.put(key, value); - return; - case MAX_TABLES_PER_ASSIGNMENT: - setMaxTablesPerAssignment(Integer.parseInt(value)); - givenParameters.put(key, value); - return; - case MAX_BYTES_PER_SCHEDULE: - setMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound(value)); - givenParameters.put(key, value); - return; - default: - throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be one of " + - PARAMETERS); + throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be one of " + PARAMETERS); } + + logger.info("Setting {} to {} for repair type {}", key, value, repairType); + givenParameters.put(key, value); + reinitParameters(); } @Override @@ -780,7 +770,7 @@ public Map getParameters() final Map parameters = new LinkedHashMap<>(); for (String parameter : PARAMETERS) { - // Use the parameter as configured if it exists, user would likely to prefer to see something more readable. + // Use the parameter as provided if present. if (givenParameters.containsKey(parameter)) { parameters.put(parameter, givenParameters.get(parameter)); @@ -790,16 +780,16 @@ public Map getParameters() switch (parameter) { case BYTES_PER_ASSIGNMENT: - parameters.put(parameter, getBytesPerAssignment().toString()); + parameters.put(parameter, bytesPerAssignment.toString()); continue; case PARTITIONS_PER_ASSIGNMENT: - parameters.put(parameter, Long.toString(getPartitionsPerAssignment())); + parameters.put(parameter, Long.toString(partitionsPerAssignment)); continue; case MAX_TABLES_PER_ASSIGNMENT: - parameters.put(parameter, Integer.toString(getMaxTablesPerAssignment())); + parameters.put(parameter, Integer.toString(maxTablesPerAssignment)); continue; case MAX_BYTES_PER_SCHEDULE: - parameters.put(parameter, getMaxBytesPerSchedule().toString()); + parameters.put(parameter, maxBytesPerSchedule.toString()); continue; default: // not expected @@ -809,46 +799,6 @@ public Map getParameters() return Collections.unmodifiableMap(parameters); } - public DataStorageSpec.LongBytesBound getBytesPerAssignment() - { - return bytesPerAssignment; - } - - public void setBytesPerAssignment(DataStorageSpec.LongBytesBound bytesPerAssignment) - { - this.bytesPerAssignment = bytesPerAssignment; - } - - public long getPartitionsPerAssignment() - { - return partitionsPerAssignment; - } - - public void setPartitionsPerAssignment(long partitionsPerAssignment) - { - this.partitionsPerAssignment = partitionsPerAssignment; - } - - public int getMaxTablesPerAssignment() - { - return maxTablesPerAssignment; - } - - public void setMaxTablesPerAssignment(int maxTablesPerAssignment) - { - this.maxTablesPerAssignment = maxTablesPerAssignment; - } - - public DataStorageSpec.LongBytesBound getMaxBytesPerSchedule() - { - return maxBytesPerSchedule; - } - - public void setMaxBytesPerSchedule(DataStorageSpec.LongBytesBound maxBytesPerSchedule) - { - this.maxBytesPerSchedule = maxBytesPerSchedule; - } - /** * Represents a size estimate by both bytes and partition count for a given keyspace and table for a token range. */ diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index e69a45482bc0..14fe9feb7958 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -73,5 +73,5 @@ public interface AutoRepairServiceMBean public Map getAutoRepairTokenRangeSplitterParameters(RepairType repairType); - public void setAutoRepairTokenRangeSplitterParameter(AutoRepairConfig.RepairType repairType, String key, String value); + public void setAutoRepairTokenRangeSplitterParameter(RepairType repairType, String key, String value); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 7ed8fa3d61dd..a6b60a9f4c1c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -18,14 +18,10 @@ package org.apache.cassandra.repair.autorepair; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashSet; -import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; @@ -34,10 +30,7 @@ import com.google.common.collect.Sets; import org.apache.cassandra.config.DurationSpec; -import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.cql3.statements.schema.TableAttributes; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.LocalStrategy; @@ -76,7 +69,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; @@ -103,10 +95,9 @@ public class AutoRepairParameterizedTest extends CQLTester AutoRepairState autoRepairState; @Mock RepairCoordinator repairRunnable; - private static AutoRepairConfig defaultConfig; - // Expected number of tables that should be repaired. - private static int expectedTablesGoingThroughRepair; + // Expected number of repairs to be executed. + private static int expectedRepairAssignments; @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; @@ -131,7 +122,7 @@ public static void setupClass() throws Exception // Calculate the expected number of tables to be repaired, this should be all system keyspaces that are // distributed, plus 1 for the table we created (ks.tbl), excluding the 'mv' materialized view and // 'tbl_disabled_auto_repair' we created. - expectedTablesGoingThroughRepair = 0; + int expectedTablesGoingThroughRepair = 0; for (Keyspace keyspace : Keyspace.all()) { // skip LocalStrategy keyspaces as these aren't repaired. @@ -148,6 +139,8 @@ public static void setupClass() throws Exception int expectedTables = keyspace.getName().equals("ks") ? 1 : keyspace.getColumnFamilyStores().size(); expectedTablesGoingThroughRepair += expectedTables; } + // Since the splitter will unwrap a full token range, we expect twice as many repairs. + expectedRepairAssignments = expectedTablesGoingThroughRepair * 2; } @Before @@ -210,7 +203,7 @@ private void resetCounters() private void resetConfig() { // prepare a fresh default config - defaultConfig = new AutoRepairConfig(true); + AutoRepairConfig defaultConfig = new AutoRepairConfig(true); for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { defaultConfig.setAutoRepairEnabled(repairType, true); @@ -223,10 +216,6 @@ private void resetConfig() config.global_settings = defaultConfig.global_settings; config.history_clear_delete_hosts_buffer_interval = defaultConfig.history_clear_delete_hosts_buffer_interval; config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); - // Use fixed splitter so we get a deterministic amount of repairs. - config.setTokenRangeSplitter(repairType, - new ParameterizedClass(FixedSplitTokenRangeSplitter.class.getName(), - Map.of(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, "1"))); } private void executeCQL() @@ -571,31 +560,6 @@ public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() disabledTablesRepairCountBefore < disabledTablesRepairCountAfter); } - @Test - public void testTokenRangesNoSplit() - { - Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - assertEquals(1, tokens.size()); - List> expectedToken = new ArrayList<>(tokens); - - List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE); - - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.emptyMap()).getRepairAssignments(true, plan); - - // should be only 1 entry for the keyspace. - assertTrue(keyspaceAssignments.hasNext()); - KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); - assertFalse(keyspaceAssignments.hasNext()); - - List assignments = keyspace.getRepairAssignments(); - assertNotNull(assignments); - - // should be 1 entry for the table which covers the full range. - assertEquals(1, assignments.size()); - assertEquals(expectedToken.get(0).left, assignments.get(0).getTokenRange().left); - assertEquals(expectedToken.get(0).right, assignments.get(0).getTokenRange().right); - } - @Test public void testTableAttribute() { @@ -679,10 +643,10 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); // Expect configured retries for each table expected to be repaired - assertEquals(config.getRepairMaxRetries()*expectedTablesGoingThroughRepair, sleepCalls.get()); + assertEquals(config.getRepairMaxRetries() * expectedRepairAssignments, sleepCalls.get()); verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(expectedTablesGoingThroughRepair); + verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(expectedRepairAssignments); } @Test @@ -708,7 +672,7 @@ public void testRepairSuccessAfterRetry() AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedTablesGoingThroughRepair); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } @@ -780,7 +744,7 @@ public void testSoakAfterImmediateRepair() AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedTablesGoingThroughRepair); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } @@ -800,7 +764,7 @@ public void testNoSoakAfterRepair() AutoRepair.instance.repair(repairType); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedTablesGoingThroughRepair); + verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index 79ff941360c1..360a270908e5 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -21,9 +21,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Set; import java.util.Map; @@ -59,10 +59,7 @@ public class FixedSplitTokenRangeSplitterTest private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; - private final Map splitterParams = new LinkedHashMap() - {{ - put(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(4)); - }}; + private static final Map splitterParams = Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(4)); @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; @@ -190,4 +187,32 @@ public void testTokenRangesSplitByKeyspace() assertEquals(tables, assignments.get(i).getTableNames()); } } + + @Test + public void testTokenRangesNoSplitByDefault() + { + Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + int totalTokenRanges = 3; + assertEquals(totalTokenRanges, tokens.size()); + List> expectedToken = new ArrayList<>(tokens); + + List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1); + + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.emptyMap()).getRepairAssignments(true, plan); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + // should be 3 entries for the table which covers each token range. + assertEquals(totalTokenRanges, assignments.size()); + for (int i = 0; i < totalTokenRanges; i++) + { + assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); + } + } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 6741e060b219..3edb2dee490b 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -112,9 +112,10 @@ public void testSizePartitionCountSplit() throws Throwable @Test public void testGetRepairAssignmentsForTable_NoSSTables() { + // Should return 1 assignment if there are no SSTables Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, ranges); - assertEquals(0, assignments.size()); + assertEquals(1, assignments.size()); } @Test @@ -276,11 +277,10 @@ public void testMergeWithDuplicateTables() @Test public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxBytesPerSchedule() { - // Ensures that getRepairAssignments splits by SUBRANGE_SIZE and filterRepairAssignments limits by MAX_BYTES_PER_SCHEDULE. - Map parameters = new HashMap<>(); - parameters.put(BYTES_PER_ASSIGNMENT, "50GiB"); - parameters.put(MAX_BYTES_PER_SCHEDULE, "100GiB"); - repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.INCREMENTAL, parameters); + // Ensures that getRepairAssignments splits by BYTES_PER_ASSIGNMENT and filterRepairAssignments limits by MAX_BYTES_PER_SCHEDULE. + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.INCREMENTAL, Collections.emptyMap()); + repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "50GiB"); + repairRangeSplitter.setParameter(MAX_BYTES_PER_SCHEDULE, "100GiB"); // Given a size estimate of 1024GiB, we should expect 21 splits (50GiB*21 = 1050GiB < 1024GiB) SizeEstimate sizeEstimate = sizeEstimateByBytes(new LongMebibytesBound("1024GiB")); @@ -303,6 +303,40 @@ public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxByte assertEquals(expectedBytes*2, filteredRepairAssignments.newBytesSoFar); } + @Test(expected=IllegalArgumentException.class) + public void testSetParameterShouldNotAllowUnknownParameter() + { + repairRangeSplitter.setParameter("unknown", "x"); + } + + @Test(expected=IllegalArgumentException.class) + public void testSetParameterShouldNotAllowSettingBytesPerAssignmentGreaterThanMaxBytesPerSchedule() + { + repairRangeSplitter.setParameter(MAX_BYTES_PER_SCHEDULE, "500GiB"); + repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "600GiB"); + } + + @Test(expected=IllegalArgumentException.class) + public void testSetParameterShouldNotAllowSettingMaxBytesPerScheduleLessThanBytesPerAssignment() + { + repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "100MiB"); + repairRangeSplitter.setParameter(MAX_BYTES_PER_SCHEDULE, "50MiB"); + } + + @Test + public void testGetParameters() + { + repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "100MiB"); + repairRangeSplitter.setParameter(MAX_TABLES_PER_ASSIGNMENT, "5"); + + Map parameters = repairRangeSplitter.getParameters(); + // Each parameter should be present. + assertEquals(RepairTokenRangeSplitter.PARAMETERS.size(), parameters.size()); + // The parameters we explicitly set should be set exactly as we set them. + assertEquals("100MiB", parameters.get(BYTES_PER_ASSIGNMENT)); + assertEquals("5", parameters.get(MAX_TABLES_PER_ASSIGNMENT)); + } + private SizeEstimate sizeEstimateByBytes(LongMebibytesBound totalSize) { return sizeEstimateByBytes(totalSize, totalSize); From b3620f1110978e47639cd596a353a3bbcf002944 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 7 Jan 2025 10:48:45 -0800 Subject: [PATCH 120/257] Remove unused import in RepairTokenRangeSplitterTest.java --- .../repair/autorepair/RepairTokenRangeSplitterTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 3edb2dee490b..fd79fc4dd556 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -22,7 +22,6 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; From 9b03111140d08ca4ff5e54f057860d5f8eda3b8a Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 7 Jan 2025 17:14:36 -0800 Subject: [PATCH 121/257] Add configuration to cassandra.yaml and cassandra_later.yaml --- conf/cassandra.yaml | 101 ++++++++++++++++++ conf/cassandra_latest.yaml | 101 ++++++++++++++++++ .../repair/autorepair/AutoRepairConfig.java | 98 ++++++++--------- 3 files changed, 248 insertions(+), 52 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index c255138a02df..3bd6f800321d 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2674,3 +2674,104 @@ storage_compatibility_mode: NONE # # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s + +# Configuration for AutoRepair Scheduler +#auto_repair: +# # Enable/Disable auto repair scheduler. +# # If it is set to false, then the scheduler thread will not be spun up. +# # If it is set to true, then the repair scheduler thread will be created. The thread will +# # look for secondary config available for each repair type (full, incremental, and preview_repaired), +# # and based on that, it will schedule repairs. +# enabled: false +# # Time interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if +# # none is going, then check if it's time to schedule or wait. +# repair_check_interval: 5m +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time in seconds for retrying a repair session. +# repair_retry_backoff: 30s +# # Minimum duration for the execution of a single repair task (i.e.: RepairRunnable). +# # This helps prevent the auto-repair scheduler from overwhelming the node by scheduling a large number of +# # repair tasks in a short period of time. +# repair_task_min_duration: 5s +# # When any nodes leave the ring then the repair schedule needs to adjust the order, etc. +# # The AutoRepair scheduler keeps the deleted hosts information in its persisted metadata for the defined interval in this config. +# # This information is useful so the scheduler is absolutely sure that the node is indeed removed from the ring, and then it can adjust the repair schedule accordingly. +# # So, the duration in this config determinses for how long deleted host's information is kept in the scheduler's metadata. +# history_clear_delete_hosts_buffer_interval: 2h +# # Configuration specific to each repair type. Options: full, incremental, and preview_repaired. +# repair_type_overrides: +# full: +# # Enable/Disable full auto repair +# enabled: false +# # Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes +# # repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round +# # until the last repair conducted on a given node is < 24 hours. +# min_repair_interval: 24h +# # Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. +# repair_by_keyspace: false +# # Number of repair threads to run for a given invoked Repair Job. +# # Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. +# # This is similar to -j for repair options for the nodetool repair command. +# number_of_repair_threads: 1 +# # Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. +# # This is to avoid penalizing good tables (neighbors) with an outlier. +# sstable_upper_threshold: 10000 +# # Max time for repairing one table on a given node, if exceeded, skip the table. The default is 6 hours. +# # Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. +# # Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. +# # Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. +# # So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. +# table_max_repair_time: 6h +# # This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. +# # If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. +# ignore_dcs: [] +# # Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' +# # It is the same as -pr in nodetool repair options. +# repair_primary_token_range_only: true +# # The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. The default is 3. +# # This configuration controls how many nodes would run repair in parallel. +# # The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. +# # If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. +# # It will ensure the maximum number of nodes running repair do not exceed “3”. +# parallel_repair_count: 3 +# # The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. +# # The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment, +# # the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. +# # The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. +# # So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. +# # Extremely fewer manual interventions as it will rarely violate the repair SLA for customers +# parallel_repair_percentage: 3 +# # If the scheduler should repair MV table or not. +# mv_repair_enabled: false +# # After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. +# initial_scheduler_delay: 5m +# # The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. +# repair_session_timeout: 3h +# # Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. +# force_repair_new_node: false +# # Splitter implementation to use for generating repair assignments. +# # The default is {@link RepairTokenRangeSplitter}. The class should implement {@link IAutoRepairTokenRangeSplitter} +# # and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) +# token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# # The target and maximum amount of bytes that should be included in a repair +# # assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this +# # involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being +# # repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction +# # that is expected. For all other repair types, this involves the amount of data covering the range being +# # repaired. +# token_range_splitter.bytes_per_assignment: 200GiB +# # The target number of partitions that should be included in a repair +# # assignment. This configuration exists to reduce excessive overstreaming. +# token_range_splitter.partitions_per_assignment: 1048576 +# # The maximum number of tables that can be included in a repair assignment. +# # This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for +# # a keyspace. Note that the splitter will avoid batching tables together if they exceed the other +# # configuration paramters such as bytes_per_assignment and partitions_per_assignment +# token_range_splitter.max_tables_per_assignment: 64 +# # The maximum number of bytes to cover an individiual schedule. This serves +# # as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to +# # reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if +# # writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up +# # the min_repair_interval. +# token_range_splitter.max_bytes_per_schedule: 100000GiB diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 9c86beeea829..db045b7659ee 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2362,3 +2362,104 @@ default_secondary_index_enabled: true # compatibility mode would no longer toggle behaviors as when it was running in the UPGRADING mode. # storage_compatibility_mode: NONE + +# Configuration for AutoRepair Scheduler +#auto_repair: +# # Enable/Disable auto repair scheduler. +# # If it is set to false, then the scheduler thread will not be spun up. +# # If it is set to true, then the repair scheduler thread will be created. The thread will +# # look for secondary config available for each repair type (full, incremental, and preview_repaired), +# # and based on that, it will schedule repairs. +# enabled: false +# # Time interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if +# # none is going, then check if it's time to schedule or wait. +# repair_check_interval: 5m +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time in seconds for retrying a repair session. +# repair_retry_backoff: 30s +# # Minimum duration for the execution of a single repair task (i.e.: RepairRunnable). +# # This helps prevent the auto-repair scheduler from overwhelming the node by scheduling a large number of +# # repair tasks in a short period of time. +# repair_task_min_duration: 5s +# # When any nodes leave the ring then the repair schedule needs to adjust the order, etc. +# # The AutoRepair scheduler keeps the deleted hosts information in its persisted metadata for the defined interval in this config. +# # This information is useful so the scheduler is absolutely sure that the node is indeed removed from the ring, and then it can adjust the repair schedule accordingly. +# # So, the duration in this config determinses for how long deleted host's information is kept in the scheduler's metadata. +# history_clear_delete_hosts_buffer_interval: 2h +# # Configuration specific to each repair type. Options: full, incremental, and preview_repaired. +# repair_type_overrides: +# full: +# # Enable/Disable full auto repair +# enabled: false +# # Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes +# # repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round +# # until the last repair conducted on a given node is < 24 hours. +# min_repair_interval: 24h +# # Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. +# repair_by_keyspace: false +# # Number of repair threads to run for a given invoked Repair Job. +# # Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. +# # This is similar to -j for repair options for the nodetool repair command. +# number_of_repair_threads: 1 +# # Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. +# # This is to avoid penalizing good tables (neighbors) with an outlier. +# sstable_upper_threshold: 10000 +# # Max time for repairing one table on a given node, if exceeded, skip the table. The default is 6 hours. +# # Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. +# # Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. +# # Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. +# # So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. +# table_max_repair_time: 6h +# # This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. +# # If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. +# ignore_dcs: [] +# # Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' +# # It is the same as -pr in nodetool repair options. +# repair_primary_token_range_only: true +# # The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. The default is 3. +# # This configuration controls how many nodes would run repair in parallel. +# # The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. +# # If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. +# # It will ensure the maximum number of nodes running repair do not exceed “3”. +# parallel_repair_count: 3 +# # The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. +# # The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment, +# # the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. +# # The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. +# # So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. +# # Extremely fewer manual interventions as it will rarely violate the repair SLA for customers +# parallel_repair_percentage: 3 +# # If the scheduler should repair MV table or not. +# mv_repair_enabled: false +# # After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. +# initial_scheduler_delay: 5m +# # The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. +# repair_session_timeout: 3h +# # Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. +# force_repair_new_node: false +# # Splitter implementation to use for generating repair assignments. +# # The default is {@link RepairTokenRangeSplitter}. The class should implement {@link IAutoRepairTokenRangeSplitter} +# # and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) +# token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# # The target and maximum amount of bytes that should be included in a repair +# # assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this +# # involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being +# # repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction +# # that is expected. For all other repair types, this involves the amount of data covering the range being +# # repaired. +# token_range_splitter.bytes_per_assignment: 200GiB +# # The target number of partitions that should be included in a repair +# # assignment. This configuration exists to reduce excessive overstreaming. +# token_range_splitter.partitions_per_assignment: 1048576 +# # The maximum number of tables that can be included in a repair assignment. +# # This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for +# # a keyspace. Note that the splitter will avoid batching tables together if they exceed the other +# # configuration paramters such as bytes_per_assignment and partitions_per_assignment +# token_range_splitter.max_tables_per_assignment: 64 +# # The maximum number of bytes to cover an individiual schedule. This serves +# # as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to +# # reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if +# # writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up +# # the min_repair_interval. +# token_range_splitter.max_bytes_per_schedule: 100000GiB diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 9be32a47318e..b46136cb40e6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -40,23 +40,24 @@ public class AutoRepairConfig implements Serializable { - // enable/disable auto repair globally, overrides all other settings. Cannot be modified dynamically. - // if it is set to false, then no repair will be scheduled, including full and incremental repairs by this framework. - // if it is set to true, then this repair scheduler will consult another config available for each RepairType, and based on that config, it will schedule repairs. + // Enable/Disable auto repair scheduler. If it is set to false, then the scheduler thread will not be spun up. + // If it is set to true, then the repair scheduler thread will be created. The thread will + // look for secondary config available for each repair type (full, incremental, and preview_repaired), + // and based on that, it will schedule repairs. public volatile Boolean enabled; - // the interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if - // none is going, then check if it's time to schedule or wait + // Time interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if + // none is going, then check if it's time to schedule or wait. public final DurationSpec.IntSecondsBound repair_check_interval = new DurationSpec.IntSecondsBound("5m"); - // when any nodes leave the ring then the repair schedule needs to adjust the order, etc. - // the repair scheduler keeps the deleted hosts information in its persisted metadata for the defined interval in this config. + // When any nodes leave the ring then the repair schedule needs to adjust the order, etc. + // The AutoRepair scheduler keeps the deleted hosts information in its persisted metadata for the defined interval in this config. // This information is useful so the scheduler is absolutely sure that the node is indeed removed from the ring, and then it can adjust the repair schedule accordingly. // So, the duration in this config determinses for how long deleted host's information is kept in the scheduler's metadata. public volatile DurationSpec.IntSecondsBound history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("2h"); - // the maximum number of retries for a repair session. + // Maximum number of retries for a repair session. public volatile Integer repair_max_retries = 3; - // the backoff time in seconds for retrying a repair session. + // Backoff time in seconds for retrying a repair session. public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); - // the minimum duration for the execution of a single repair task (i.e.: RepairRunnable). + // Minimum duration for the execution of a single repair task (i.e.: RepairRunnable). // This helps prevent the auto-repair scheduler from overwhelming the node by scheduling a large number of // repair tasks in a short period of time. public volatile DurationSpec.LongSecondsBound repair_task_min_duration = new DurationSpec.LongSecondsBound("5s"); @@ -100,7 +101,7 @@ public static AutoRepairState getAutoRepairState(RepairType repairType) case INCREMENTAL: return new IncrementalRepairState(); case PREVIEW_REPAIRED: - return new PreviewRepairedState(); + return new PreviewRepairedState(); } throw new IllegalArgumentException("Invalid repair type: " + repairType); @@ -412,55 +413,49 @@ protected static Options getDefaultOptions() return opts; } - // enable/disable auto repair for the given repair type + // Enable/Disable full auto repair public volatile Boolean enabled; - // auto repair is default repair table by table, if this is enabled, the framework will repair all the tables in a keyspace in one go. + // Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. public volatile Boolean repair_by_keyspace; - // the number of repair threads to run for a given invoked Repair Job. - // Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. - // This is similar to -j for repair options for the nodetool repair command. + // Number of repair threads to run for a given invoked Repair Job. + // Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. + // This is similar to -j for repair options for the nodetool repair command. public volatile Integer number_of_repair_threads; - // The number of nodes running repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. The default is 3. - // This configuration controls how many nodes would run repair in parallel. - // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. - // If one or more node(s) finish repair, then the framework automatically picks up the next candidate and ensures the maximum number of nodes running repair do not exceed “3”. + // The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. The default is 3. + // This configuration controls how many nodes would run repair in parallel. + // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. + // If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. + // It will ensure the maximum number of nodes running repair do not exceed “3”. public volatile Integer parallel_repair_count; - // the number of repair nodes that can run in parallel - // of the total number of nodes in the group [0,100] - // The percentage of nodes in the cluster that run repair parallelly. If parallelrepaircount is set, it will choose the larger value of the two. - // The problem with a fixed number of nodes (the above property) is that in a large-scale environment, - // the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. - // The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. - // So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. - // Extremely fewer manual interventions as it will rarely violate the repair SLA for customers + // The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. + // The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment, + // the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. + // The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. + // So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. + // Extremely fewer manual interventions as it will rarely violate the repair SLA for customers public volatile Integer parallel_repair_percentage; - // the upper threshold of SSTables allowed to participate in a single repair session - // Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. - // This is to avoid penalizing good tables (neighbors) with an outlier. + // Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. + // This is to avoid penalizing good tables (neighbors) with an outlier. public volatile Integer sstable_upper_threshold; - // the minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes - // repair quicly. - // The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round - // until the last repair conducted on a given node is < 24 hours. + // Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes + // repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round + // until the last repair conducted on a given node is < 24 hours. public volatile DurationSpec.IntSecondsBound min_repair_interval; - // specifies a denylist of datacenters to repair - // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. + // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. + // If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. public volatile Set ignore_dcs; - // Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' - // It is the same as -pr in nodetool repair options. + // Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' + // It is the same as -pr in nodetool repair options. public volatile Boolean repair_primary_token_range_only; - // configures whether to force immediate repair on new nodes - // default it is set to 'false'; this is useful if you want to repair new nodes immediately after they join the ring. + // Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. public volatile Boolean force_repair_new_node; - // the maximum time that a repair session can run for a single table - // Max time for repairing one table on a given node, if exceeded, skip the table. The default is 6 hours. - // Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. - // Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. - // Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. - // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. + // Max time for repairing one table on a given node, if exceeded, skip the table. The default is 6 hours. + // Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. + // Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. + // Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. + // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. public volatile DurationSpec.IntSecondsBound table_max_repair_time; - // the default is 'true'. - // This flag determines whether the auto-repair framework needs to run anti-entropy, a.k.a, repair on the MV table or not. + // If the scheduler should repair MV table or not. public volatile Boolean mv_repair_enabled; /** * Splitter implementation to use for generating repair assignments. @@ -469,10 +464,9 @@ protected static Options getDefaultOptions() * and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) */ public volatile ParameterizedClass token_range_splitter; - // the minimum delay after a node starts before the scheduler starts running repair + // After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; - // repair session timeout - this is applicable for each repair session - // the major issue with Repair is a session sometimes hangs; so this timeout is useful to unblock such problems + // The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. public volatile DurationSpec.IntSecondsBound repair_session_timeout; public String toString() From 7a3c36e703257321a6f7cab68c4c511e79959e67 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 7 Jan 2025 18:36:12 -0800 Subject: [PATCH 122/257] Add a doc for AutoRepair feature --- conf/cassandra.yaml | 12 +- conf/cassandra_latest.yaml | 12 +- .../cassandra/pages/auto-repair/overview.adoc | 125 ++++++++++++++++++ .../repair/autorepair/AutoRepairConfig.java | 66 ++++----- .../autorepair/RepairTokenRangeSplitter.java | 4 +- 5 files changed, 172 insertions(+), 47 deletions(-) create mode 100644 doc/modules/cassandra/pages/auto-repair/overview.adoc diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 3bd6f800321d..7b6470a7ca76 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2717,7 +2717,7 @@ storage_compatibility_mode: NONE # # Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. # # This is to avoid penalizing good tables (neighbors) with an outlier. # sstable_upper_threshold: 10000 -# # Max time for repairing one table on a given node, if exceeded, skip the table. The default is 6 hours. +# # Max time for repairing one table on a given node, if exceeded, skip the table. # # Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. # # Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. # # Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. @@ -2726,10 +2726,10 @@ storage_compatibility_mode: NONE # # This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. # # If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. # ignore_dcs: [] -# # Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' +# # Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false'. # # It is the same as -pr in nodetool repair options. # repair_primary_token_range_only: true -# # The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. The default is 3. +# # The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. # # This configuration controls how many nodes would run repair in parallel. # # The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. # # If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. @@ -2740,7 +2740,7 @@ storage_compatibility_mode: NONE # # the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. # # The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. # # So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. -# # Extremely fewer manual interventions as it will rarely violate the repair SLA for customers +# # Extremely fewer manual interventions as it will rarely violate the repair SLA for customers. # parallel_repair_percentage: 3 # # If the scheduler should repair MV table or not. # mv_repair_enabled: false @@ -2767,9 +2767,9 @@ storage_compatibility_mode: NONE # # The maximum number of tables that can be included in a repair assignment. # # This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for # # a keyspace. Note that the splitter will avoid batching tables together if they exceed the other -# # configuration paramters such as bytes_per_assignment and partitions_per_assignment +# # configuration parameters such as bytes_per_assignment and partitions_per_assignment # token_range_splitter.max_tables_per_assignment: 64 -# # The maximum number of bytes to cover an individiual schedule. This serves +# # The maximum number of bytes to cover an individual schedule. This serves # # as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to # # reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if # # writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index db045b7659ee..dfd2bb504d0a 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2405,7 +2405,7 @@ storage_compatibility_mode: NONE # # Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. # # This is to avoid penalizing good tables (neighbors) with an outlier. # sstable_upper_threshold: 10000 -# # Max time for repairing one table on a given node, if exceeded, skip the table. The default is 6 hours. +# # Max time for repairing one table on a given node, if exceeded, skip the table. # # Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. # # Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. # # Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. @@ -2414,10 +2414,10 @@ storage_compatibility_mode: NONE # # This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. # # If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. # ignore_dcs: [] -# # Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' +# # Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false'. # # It is the same as -pr in nodetool repair options. # repair_primary_token_range_only: true -# # The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. The default is 3. +# # The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. # # This configuration controls how many nodes would run repair in parallel. # # The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. # # If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. @@ -2428,7 +2428,7 @@ storage_compatibility_mode: NONE # # the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. # # The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. # # So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. -# # Extremely fewer manual interventions as it will rarely violate the repair SLA for customers +# # Extremely fewer manual interventions as it will rarely violate the repair SLA for customers. # parallel_repair_percentage: 3 # # If the scheduler should repair MV table or not. # mv_repair_enabled: false @@ -2455,9 +2455,9 @@ storage_compatibility_mode: NONE # # The maximum number of tables that can be included in a repair assignment. # # This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for # # a keyspace. Note that the splitter will avoid batching tables together if they exceed the other -# # configuration paramters such as bytes_per_assignment and partitions_per_assignment +# # configuration parameters such as bytes_per_assignment and partitions_per_assignment # token_range_splitter.max_tables_per_assignment: 64 -# # The maximum number of bytes to cover an individiual schedule. This serves +# # The maximum number of bytes to cover an individual schedule. This serves # # as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to # # reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if # # writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up diff --git a/doc/modules/cassandra/pages/auto-repair/overview.adoc b/doc/modules/cassandra/pages/auto-repair/overview.adoc new file mode 100644 index 000000000000..4305e63a9810 --- /dev/null +++ b/doc/modules/cassandra/pages/auto-repair/overview.adoc @@ -0,0 +1,125 @@ += Overview of Auto Repair +:navtitle: Auto Repair overview +:description: Auto Repair concepts - How it works, how to configure it, and more. +:keywords: CEP-37 + +Repair orchestration inside Apache Cassandra. This fully Automated Repair scheduler inside Cassandra does not depend on the control plane, significantly reducing our operational overhead. +At a high level, a dedicated thread pool is assigned to the repair scheduler. The repair scheduler inside Cassandra maintains a new replicated table under a distributed system_distributed keyspace. This table maintains the repair history for all the nodes, such as when it was repaired the last time, etc. The scheduler will pick the node(s) that run the repair first and continue orchestration to ensure Every table and all of their token ranges are repaired. The algorithm can also run repairs simultaneously on multiple nodes and splits the token range into subranges with the necessary retry to handle transient failures. The automatic repair runs as soon as we start a Cassandra cluster, like Compaction, and does not require human-in-the-loop. +The scheduler supports Full, Incremental, and Preview repair types today with the following features. A new repair type, such as Paxos repair and any future repair types, should be extended with minimal dev work! + +=== Features +- Capability to run on multiple nodes simultaneously +- Default implementation and an interface to override the data set being repaired per repair session +- Ability to extend the token split algorithms with the following two implementations readily available: +- Splits token ranges by putting a max cap on the size of data being repaired as part of one repair session as well as a max cap at a table level - this is crucial for Incremental Repair (Default) +- Split tokens evenly based on the number of splits specified +- A new CQL table property to do the following: +- Disable repair type at a table level if one wants the scheduler to skip one or more tables +- Setting repair priority for certain tables to prioritize those tables over others +- Enabling/Disabling scheduler for each repair type dynamically +- Per Data Center per job configuration +- Rich Configuration for each repair type, e.g., full, incremental, or preview_repair +- Enough observability so an operator can configure alarms depending on their need + +=== Yaml Configuration +[cols=",,",options="header",] +|=== +| Name | Default | Description +| enabled | false | Enable/Disable auto repair scheduler +| min_repair_interval | 24h | Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes. repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round until the last repair conducted on a given node is < 24 hours. +| repair_by_keyspace | false | Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. +| number_of_repair_threads | 1 | Number of repair threads to run for a given invoked Repair Job. Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. This is similar to -j for repair options for the nodetool repair command. +| sstable_upper_threshold | 10000 | Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. This is to avoid penalizing good tables (neighbors) with an outlier. +| table_max_repair_time | 6h | Max time for repairing one table on a given node, if exceeded, skip the table. Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. +| ignore_dcs | [] | This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. +| repair_primary_token_range_only | true | Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false'. It is the same as -pr in nodetool repair options. +| parallel_repair_count | 3 | The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. This configuration controls how many nodes would run repair in parallel. The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. It will ensure the maximum number of nodes running repair do not exceed “3”. +| parallel_repair_percentage | 3 | The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment,the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. Extremely fewer manual interventions as it will rarely violate the repair SLA for customers. +| mv_repair_enabled | false | If the scheduler should repair MV table or not. +| initial_scheduler_delay | 5m | After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. +| repair_session_timeout | 3h | The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. +| force_repair_new_node | false | Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. +| token_range_splitter | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Splitter implementation to use for generating repair assignments.The default is {@link RepairTokenRangeSplitter}. The class should implement {@link IAutoRepairTokenRangeSplitter} and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) +| token_range_splitter.bytes_per_assignment | 200GiB |The target and maximum amount of bytes that should be included in a repair assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction that is expected. For all other repair types, this involves the amount of data covering the range being repaired. +| token_range_splitter.partitions_per_assignment | 1048576 | he target number of partitions that should be included in a repair assignment. This configuration exists to reduce excessive overstreaming. +| token_range_splitter.max_tables_per_assignment | 64 | The maximum number of tables that can be included in a repair assignment. This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for a keyspace. Note that the splitter will avoid batching tables together if they exceed the other configuration parameters such as bytes_per_assignment and partitions_per_assignment +| token_range_splitter.max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover an individual schedule. This serves as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up the min_repair_interval. +|=== + + +=== Nodetool Configuration +==== nodetool getautorepairconfig +``` +$> nodetool getautorepairconfig +repair scheduler configuration: + repair_check_interval: 5m + repair_max_retries: 3 + repair_retry_backoff: 30s + repair_task_min_duration: 5s + history_clear_delete_hosts_buffer_interval: 2h +configuration for repair_type: full + enabled: true + min_repair_interval: 24h + repair_by_keyspace: false + number_of_repair_threads: 1 + sstable_upper_threshold: 10000 + table_max_repair_time: 6h + ignore_dcs: [] + repair_primary_token_range_only: true + parallel_repair_count: 3 + parallel_repair_percentage: 3 + mv_repair_enabled: false + initial_scheduler_delay: 5m + repair_session_timeout: 3h + force_repair_new_node: false + token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter + token_range_splitter.bytes_per_assignment: 200GiB + token_range_splitter.partitions_per_assignment: 1048576 + token_range_splitter.max_tables_per_assignment: 64 + token_range_splitter.max_bytes_per_schedule: 100000GiB +configuration for repair_type: incremental + enabled: true + min_repair_interval: 24h + repair_by_keyspace: false + number_of_repair_threads: 1 + sstable_upper_threshold: 10000 + table_max_repair_time: 6h + ignore_dcs: [] + repair_primary_token_range_only: true + parallel_repair_count: 3 + parallel_repair_percentage: 3 + mv_repair_enabled: false + initial_scheduler_delay: 5m + repair_session_timeout: 3h + force_repair_new_node: false + token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter + token_range_splitter.bytes_per_assignment: 50GiB + token_range_splitter.partitions_per_assignment: 1048576 + token_range_splitter.max_tables_per_assignment: 64 + token_range_splitter.max_bytes_per_schedule: 100GiB +configuration for repair_type: preview_repaired + enabled: false +``` + +==== nodetool autorepairstatus +``` +$> nodetool autorepairstatus -t incremental +Active Repairs +425cea55-09aa-46e0-8911-9f37a4424574 + + +$> nodetool autorepairstatus -t full +Active Repairs +NONE + +``` + +==== nodetool setautorepairconfig +``` +$> nodetool setautorepairconfig -t incremental number_of_repair_threads 2 +``` + + + +==== More details +https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Automated+Repair+Solution[CEP-37] \ No newline at end of file diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index b46136cb40e6..2d18851f58b2 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -415,47 +415,47 @@ protected static Options getDefaultOptions() // Enable/Disable full auto repair public volatile Boolean enabled; - // Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. + // Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. public volatile Boolean repair_by_keyspace; - // Number of repair threads to run for a given invoked Repair Job. - // Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. - // This is similar to -j for repair options for the nodetool repair command. + // Number of repair threads to run for a given invoked Repair Job. + // Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. + // This is similar to -j for repair options for the nodetool repair command. public volatile Integer number_of_repair_threads; - // The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. The default is 3. - // This configuration controls how many nodes would run repair in parallel. - // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. - // If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. - // It will ensure the maximum number of nodes running repair do not exceed “3”. + // The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. + // This configuration controls how many nodes would run repair in parallel. + // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. + // If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. + // It will ensure the maximum number of nodes running repair do not exceed “3”. public volatile Integer parallel_repair_count; - // The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. - // The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment, - // the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. - // The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. - // So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. - // Extremely fewer manual interventions as it will rarely violate the repair SLA for customers + // The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. + // The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment, + // the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. + // The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. + // So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. + // Extremely fewer manual interventions as it will rarely violate the repair SLA for customers. public volatile Integer parallel_repair_percentage; - // Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. - // This is to avoid penalizing good tables (neighbors) with an outlier. + // Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. + // This is to avoid penalizing good tables (neighbors) with an outlier. public volatile Integer sstable_upper_threshold; - // Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes - // repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round - // until the last repair conducted on a given node is < 24 hours. + // Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes + // repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round + // until the last repair conducted on a given node is < 24 hours. public volatile DurationSpec.IntSecondsBound min_repair_interval; - // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. - // If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. + // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. + // If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. public volatile Set ignore_dcs; - // Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false' - // It is the same as -pr in nodetool repair options. + // Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false'. + // It is the same as -pr in nodetool repair options. public volatile Boolean repair_primary_token_range_only; - // Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. + // Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. public volatile Boolean force_repair_new_node; - // Max time for repairing one table on a given node, if exceeded, skip the table. The default is 6 hours. - // Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. - // Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. - // Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. - // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. + // Max time for repairing one table on a given node, if exceeded, skip the table. + // Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. + // Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. + // Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. + // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. public volatile DurationSpec.IntSecondsBound table_max_repair_time; - // If the scheduler should repair MV table or not. + // If the scheduler should repair MV table or not. public volatile Boolean mv_repair_enabled; /** * Splitter implementation to use for generating repair assignments. @@ -464,9 +464,9 @@ protected static Options getDefaultOptions() * and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) */ public volatile ParameterizedClass token_range_splitter; - // After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. + // After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; - // The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. + // The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. public volatile DurationSpec.IntSecondsBound repair_session_timeout; public String toString() diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 8851c99ddb8f..1b81c40f6112 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -116,10 +116,10 @@ * max_tables_per_assignment: The maximum number of tables that can be included in a repair assignment. * This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for * a keyspace. Note that the splitter will avoid batching tables together if they exceed the other - * configuration paramters such as bytes_per_assignment and partitions_per_assignment. + * configuration parameters such as bytes_per_assignment and partitions_per_assignment. * *
  • - * max_bytes_per_schedule: The maximum number of bytes to cover an individiual schedule. This serves + * max_bytes_per_schedule: The maximum number of bytes to cover an individual schedule. This serves * as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to * reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if * writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up From b5ddfa8aaf8da83ef52a3fe77651729829846d74 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 15 Jan 2025 20:34:01 -0800 Subject: [PATCH 123/257] cr from Andy --- conf/cassandra.yaml | 207 +++++++++--------- conf/cassandra_latest.yaml | 207 +++++++++--------- .../cassandra/pages/auto-repair/overview.adoc | 158 +++++++++---- .../repair/autorepair/AutoRepairConfig.java | 134 +++++++----- .../repair/autorepair/AutoRepairUtils.java | 2 +- .../cassandra/service/AutoRepairService.java | 2 +- .../org/apache/cassandra/tools/NodeProbe.java | 2 +- .../tools/nodetool/GetAutoRepairConfig.java | 2 +- .../tools/nodetool/SetAutoRepairConfig.java | 6 +- .../config/YamlConfigurationLoaderTest.java | 2 +- .../autorepair/AutoRepairConfigTest.java | 65 +++++- .../AutoRepairParameterizedTest.java | 26 +-- .../service/AutoRepairServiceSetterTest.java | 2 +- .../nodetool/SetAutoRepairConfigTest.java | 2 +- 14 files changed, 493 insertions(+), 324 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 7b6470a7ca76..3bb32f41e2f8 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2675,103 +2675,110 @@ storage_compatibility_mode: NONE # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s -# Configuration for AutoRepair Scheduler -#auto_repair: -# # Enable/Disable auto repair scheduler. -# # If it is set to false, then the scheduler thread will not be spun up. -# # If it is set to true, then the repair scheduler thread will be created. The thread will -# # look for secondary config available for each repair type (full, incremental, and preview_repaired), -# # and based on that, it will schedule repairs. -# enabled: false -# # Time interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if -# # none is going, then check if it's time to schedule or wait. -# repair_check_interval: 5m -# # Maximum number of retries for a repair session. -# repair_max_retries: 3 -# # Backoff time in seconds for retrying a repair session. -# repair_retry_backoff: 30s -# # Minimum duration for the execution of a single repair task (i.e.: RepairRunnable). -# # This helps prevent the auto-repair scheduler from overwhelming the node by scheduling a large number of -# # repair tasks in a short period of time. -# repair_task_min_duration: 5s -# # When any nodes leave the ring then the repair schedule needs to adjust the order, etc. -# # The AutoRepair scheduler keeps the deleted hosts information in its persisted metadata for the defined interval in this config. -# # This information is useful so the scheduler is absolutely sure that the node is indeed removed from the ring, and then it can adjust the repair schedule accordingly. -# # So, the duration in this config determinses for how long deleted host's information is kept in the scheduler's metadata. -# history_clear_delete_hosts_buffer_interval: 2h -# # Configuration specific to each repair type. Options: full, incremental, and preview_repaired. -# repair_type_overrides: -# full: -# # Enable/Disable full auto repair -# enabled: false -# # Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes -# # repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round -# # until the last repair conducted on a given node is < 24 hours. -# min_repair_interval: 24h -# # Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. -# repair_by_keyspace: false -# # Number of repair threads to run for a given invoked Repair Job. -# # Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. -# # This is similar to -j for repair options for the nodetool repair command. -# number_of_repair_threads: 1 -# # Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. -# # This is to avoid penalizing good tables (neighbors) with an outlier. -# sstable_upper_threshold: 10000 -# # Max time for repairing one table on a given node, if exceeded, skip the table. -# # Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. -# # Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. -# # Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. -# # So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. -# table_max_repair_time: 6h -# # This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. -# # If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. -# ignore_dcs: [] -# # Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false'. -# # It is the same as -pr in nodetool repair options. -# repair_primary_token_range_only: true -# # The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. -# # This configuration controls how many nodes would run repair in parallel. -# # The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. -# # If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. -# # It will ensure the maximum number of nodes running repair do not exceed “3”. -# parallel_repair_count: 3 -# # The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. -# # The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment, -# # the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. -# # The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. -# # So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. -# # Extremely fewer manual interventions as it will rarely violate the repair SLA for customers. -# parallel_repair_percentage: 3 -# # If the scheduler should repair MV table or not. -# mv_repair_enabled: false -# # After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. -# initial_scheduler_delay: 5m -# # The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. -# repair_session_timeout: 3h -# # Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. -# force_repair_new_node: false -# # Splitter implementation to use for generating repair assignments. -# # The default is {@link RepairTokenRangeSplitter}. The class should implement {@link IAutoRepairTokenRangeSplitter} -# # and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) -# token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter -# # The target and maximum amount of bytes that should be included in a repair -# # assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this -# # involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being -# # repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction -# # that is expected. For all other repair types, this involves the amount of data covering the range being -# # repaired. -# token_range_splitter.bytes_per_assignment: 200GiB -# # The target number of partitions that should be included in a repair -# # assignment. This configuration exists to reduce excessive overstreaming. -# token_range_splitter.partitions_per_assignment: 1048576 -# # The maximum number of tables that can be included in a repair assignment. -# # This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for -# # a keyspace. Note that the splitter will avoid batching tables together if they exceed the other -# # configuration parameters such as bytes_per_assignment and partitions_per_assignment -# token_range_splitter.max_tables_per_assignment: 64 -# # The maximum number of bytes to cover an individual schedule. This serves -# # as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to -# # reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if -# # writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up -# # the min_repair_interval. -# token_range_splitter.max_bytes_per_schedule: 100000GiB +# Configuration for AutoRepair Scheduler. +# auto_repair: +# # Enable/Disable the auto-repair scheduler. +# # If set to false, the scheduler thread will not be started. +# # If set to true, the repair scheduler thread will be created. The thread will +# # check for secondary configuration available for each repair type (full, incremental, +# # and preview_repaired), and based on that, it will schedule repairs. +# enabled: true +# repair_type_overrides: +# full: +# # Enable/Disable full auto-repair +# enabled: true +# # Minimum duration between repairing the same node again. This is useful for tiny clusters, such as +# # clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler +# # completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node +# # until 24 hours have passed since the last repair. +# min_repair_interval: 24h +# token_range_splitter: +# parameters: +# # The target and maximum amount of bytes that should be included in a repair +# # assignment. This scopes the amount of work involved in a repair and includes +# # the data covering the range being repaired. +# bytes_per_assignment: 200GiB +# # The maximum number of bytes to cover in an individual schedule. This serves as +# # a mechanism to throttle the work done in each repair cycle. You may reduce this +# # value if the impact of repairs is causing too much load on the cluster or increase it +# # if writes outpace the amount of data being repaired. Alternatively, adjust the +# # min_repair_interval. +# max_bytes_per_schedule: 100000GiB +# incremental: +# enabled: true +# # Incremental repairs finish quickly, so they can be run more frequently. The default is 1 hour. +# min_repair_interval: 1h +# token_range_splitter: +# parameters: +# # Configured to attempt repairing 50GiB of data per repair. +# # This throttles the amount of incremental repair and anticompaction done per schedule +# # after incremental repairs are turned on. +# bytes_per_assignment: 50GiB +# # The maximum number of bytes to cover in an individual schedule to 100GiB. +# # Consider increasing max_bytes_per_schedule if +# # more data is written than this limit within the min_repair_interval. +# max_bytes_per_schedule: 100GiB +# preview_repaired: +# enabled: true +# min_repair_interval: 24h +# token_range_splitter: +# parameters: +# bytes_per_assignment: 200GiB +# max_bytes_per_schedule: 100000GiB +# # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule +# # repairs. +# repair_check_interval: 5m +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time before retrying a repair session. +# repair_retry_backoff: 30s +# # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming +# # the node by scheduling too many repair tasks in a short period of time. +# repair_task_min_duration: 5s +# # The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata +# # for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. +# history_clear_delete_hosts_buffer_interval: 2h +# global_settings: +# # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired +# # individually. +# repair_by_keyspace: false +# # Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool +# # repair. +# number_of_repair_threads: 1 +# # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. +# parallel_repair_count: 3 +# # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value +# # is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. +# parallel_repair_percentage: 3 +# # Repairs materialized view if true. +# materialized_view_repair_enabled: false +# # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. +# initial_scheduler_delay: 5m +# # Timeout for resuming stuck repair sessions. +# repair_session_timeout: 3h +# # Force immediate repair on new nodes after they join the ring. +# force_repair_new_node: false +# # Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good +# # tables. +# sstable_upper_threshold: 10000 +# # Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the +# # next table. +# table_max_repair_time: 6h +# # Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify data +# # centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded +# # data centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to +# # not run repair schedule in certain data centers. +# ignore_dcs: [] +# # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults +# # to true. General advice is to keep this true. +# repair_primary_token_range_only: true +# # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. +# token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# token_range_splitter: +# parameters: +# # Target number of partitions to include in a repair assignment to reduce excessive overstreaming. +# partitions_per_assignment: 1048576 +# # Maximum number of tables to include in a repair assignment. This reduces the number of repairs, +# # especially in keyspaces with many tables. The splitter avoids batching tables together if they +# # exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment. +# max_tables_per_assignment: 64 diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index dfd2bb504d0a..46cb642ef04d 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2363,103 +2363,110 @@ default_secondary_index_enabled: true # storage_compatibility_mode: NONE -# Configuration for AutoRepair Scheduler -#auto_repair: -# # Enable/Disable auto repair scheduler. -# # If it is set to false, then the scheduler thread will not be spun up. -# # If it is set to true, then the repair scheduler thread will be created. The thread will -# # look for secondary config available for each repair type (full, incremental, and preview_repaired), -# # and based on that, it will schedule repairs. -# enabled: false -# # Time interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if -# # none is going, then check if it's time to schedule or wait. -# repair_check_interval: 5m -# # Maximum number of retries for a repair session. -# repair_max_retries: 3 -# # Backoff time in seconds for retrying a repair session. -# repair_retry_backoff: 30s -# # Minimum duration for the execution of a single repair task (i.e.: RepairRunnable). -# # This helps prevent the auto-repair scheduler from overwhelming the node by scheduling a large number of -# # repair tasks in a short period of time. -# repair_task_min_duration: 5s -# # When any nodes leave the ring then the repair schedule needs to adjust the order, etc. -# # The AutoRepair scheduler keeps the deleted hosts information in its persisted metadata for the defined interval in this config. -# # This information is useful so the scheduler is absolutely sure that the node is indeed removed from the ring, and then it can adjust the repair schedule accordingly. -# # So, the duration in this config determinses for how long deleted host's information is kept in the scheduler's metadata. -# history_clear_delete_hosts_buffer_interval: 2h -# # Configuration specific to each repair type. Options: full, incremental, and preview_repaired. -# repair_type_overrides: -# full: -# # Enable/Disable full auto repair -# enabled: false -# # Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes -# # repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round -# # until the last repair conducted on a given node is < 24 hours. -# min_repair_interval: 24h -# # Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. -# repair_by_keyspace: false -# # Number of repair threads to run for a given invoked Repair Job. -# # Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. -# # This is similar to -j for repair options for the nodetool repair command. -# number_of_repair_threads: 1 -# # Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. -# # This is to avoid penalizing good tables (neighbors) with an outlier. -# sstable_upper_threshold: 10000 -# # Max time for repairing one table on a given node, if exceeded, skip the table. -# # Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. -# # Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. -# # Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. -# # So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. -# table_max_repair_time: 6h -# # This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. -# # If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. -# ignore_dcs: [] -# # Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false'. -# # It is the same as -pr in nodetool repair options. -# repair_primary_token_range_only: true -# # The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. -# # This configuration controls how many nodes would run repair in parallel. -# # The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. -# # If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. -# # It will ensure the maximum number of nodes running repair do not exceed “3”. -# parallel_repair_count: 3 -# # The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. -# # The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment, -# # the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. -# # The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. -# # So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. -# # Extremely fewer manual interventions as it will rarely violate the repair SLA for customers. -# parallel_repair_percentage: 3 -# # If the scheduler should repair MV table or not. -# mv_repair_enabled: false -# # After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. -# initial_scheduler_delay: 5m -# # The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. -# repair_session_timeout: 3h -# # Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. -# force_repair_new_node: false -# # Splitter implementation to use for generating repair assignments. -# # The default is {@link RepairTokenRangeSplitter}. The class should implement {@link IAutoRepairTokenRangeSplitter} -# # and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) -# token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter -# # The target and maximum amount of bytes that should be included in a repair -# # assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this -# # involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being -# # repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction -# # that is expected. For all other repair types, this involves the amount of data covering the range being -# # repaired. -# token_range_splitter.bytes_per_assignment: 200GiB -# # The target number of partitions that should be included in a repair -# # assignment. This configuration exists to reduce excessive overstreaming. -# token_range_splitter.partitions_per_assignment: 1048576 -# # The maximum number of tables that can be included in a repair assignment. -# # This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for -# # a keyspace. Note that the splitter will avoid batching tables together if they exceed the other -# # configuration parameters such as bytes_per_assignment and partitions_per_assignment -# token_range_splitter.max_tables_per_assignment: 64 -# # The maximum number of bytes to cover an individual schedule. This serves -# # as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to -# # reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if -# # writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up -# # the min_repair_interval. -# token_range_splitter.max_bytes_per_schedule: 100000GiB +# Configuration for AutoRepair Scheduler. +# auto_repair: +# # Enable/Disable the auto-repair scheduler. +# # If set to false, the scheduler thread will not be started. +# # If set to true, the repair scheduler thread will be created. The thread will +# # check for secondary configuration available for each repair type (full, incremental, +# # and preview_repaired), and based on that, it will schedule repairs. +# enabled: true +# repair_type_overrides: +# full: +# # Enable/Disable full auto-repair +# enabled: true +# # Minimum duration between repairing the same node again. This is useful for tiny clusters, such as +# # clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler +# # completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node +# # until 24 hours have passed since the last repair. +# min_repair_interval: 24h +# token_range_splitter: +# parameters: +# # The target and maximum amount of bytes that should be included in a repair +# # assignment. This scopes the amount of work involved in a repair and includes +# # the data covering the range being repaired. +# bytes_per_assignment: 200GiB +# # The maximum number of bytes to cover in an individual schedule. This serves as +# # a mechanism to throttle the work done in each repair cycle. You may reduce this +# # value if the impact of repairs is causing too much load on the cluster or increase it +# # if writes outpace the amount of data being repaired. Alternatively, adjust the +# # min_repair_interval. +# max_bytes_per_schedule: 100000GiB +# incremental: +# enabled: true +# # Incremental repairs finish quickly, so they can be run more frequently. The default is 1 hour. +# min_repair_interval: 1h +# token_range_splitter: +# parameters: +# # Configured to attempt repairing 50GiB of data per repair. +# # This throttles the amount of incremental repair and anticompaction done per schedule +# # after incremental repairs are turned on. +# bytes_per_assignment: 50GiB +# # The maximum number of bytes to cover in an individual schedule to 100GiB. +# # Consider increasing max_bytes_per_schedule if +# # more data is written than this limit within the min_repair_interval. +# max_bytes_per_schedule: 100GiB +# preview_repaired: +# enabled: true +# min_repair_interval: 24h +# token_range_splitter: +# parameters: +# bytes_per_assignment: 200GiB +# max_bytes_per_schedule: 100000GiB +# # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule +# # repairs. +# repair_check_interval: 5m +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time before retrying a repair session. +# repair_retry_backoff: 30s +# # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming +# # the node by scheduling too many repair tasks in a short period of time. +# repair_task_min_duration: 5s +# # The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata +# # for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. +# history_clear_delete_hosts_buffer_interval: 2h +# global_settings: +# # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired +# # individually. +# repair_by_keyspace: false +# # Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool +# # repair. +# number_of_repair_threads: 1 +# # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. +# parallel_repair_count: 3 +# # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value +# # is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. +# parallel_repair_percentage: 3 +# # Repairs materialized view if true. +# materialized_view_repair_enabled: false +# # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. +# initial_scheduler_delay: 5m +# # Timeout for resuming stuck repair sessions. +# repair_session_timeout: 3h +# # Force immediate repair on new nodes after they join the ring. +# force_repair_new_node: false +# # Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good +# # tables. +# sstable_upper_threshold: 10000 +# # Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the +# # next table. +# table_max_repair_time: 6h +# # Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify data +# # centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded +# # data centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to +# # not run repair schedule in certain data centers. +# ignore_dcs: [] +# # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults +# # to true. General advice is to keep this true. +# repair_primary_token_range_only: true +# # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. +# token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# token_range_splitter: +# parameters: +# # Target number of partitions to include in a repair assignment to reduce excessive overstreaming. +# partitions_per_assignment: 1048576 +# # Maximum number of tables to include in a repair assignment. This reduces the number of repairs, +# # especially in keyspaces with many tables. The splitter avoids batching tables together if they +# # exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment. +# max_tables_per_assignment: 64 diff --git a/doc/modules/cassandra/pages/auto-repair/overview.adoc b/doc/modules/cassandra/pages/auto-repair/overview.adoc index 4305e63a9810..d9961a5e752a 100644 --- a/doc/modules/cassandra/pages/auto-repair/overview.adoc +++ b/doc/modules/cassandra/pages/auto-repair/overview.adoc @@ -3,47 +3,129 @@ :description: Auto Repair concepts - How it works, how to configure it, and more. :keywords: CEP-37 -Repair orchestration inside Apache Cassandra. This fully Automated Repair scheduler inside Cassandra does not depend on the control plane, significantly reducing our operational overhead. -At a high level, a dedicated thread pool is assigned to the repair scheduler. The repair scheduler inside Cassandra maintains a new replicated table under a distributed system_distributed keyspace. This table maintains the repair history for all the nodes, such as when it was repaired the last time, etc. The scheduler will pick the node(s) that run the repair first and continue orchestration to ensure Every table and all of their token ranges are repaired. The algorithm can also run repairs simultaneously on multiple nodes and splits the token range into subranges with the necessary retry to handle transient failures. The automatic repair runs as soon as we start a Cassandra cluster, like Compaction, and does not require human-in-the-loop. -The scheduler supports Full, Incremental, and Preview repair types today with the following features. A new repair type, such as Paxos repair and any future repair types, should be extended with minimal dev work! +Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This +significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit and +manage repairs. + +At a high level, a dedicated thread pool is assigned to the repair scheduler. The repair scheduler in Cassandra +maintains a new replicated table, system_distributed.auto_repair_history, which stores the repair history for all nodes, +including details such as the last repair time. The scheduler selects the node(s) to begin repairs and orchestrates the +process to ensure that every table and its token ranges are repaired. The algorithm can run repairs simultaneously on +multiple nodes and splits token ranges into subranges, with necessary retries to handle transient failures. Automatic +repair starts as soon as a Cassandra cluster is launched, similar to compaction, and does not require human +intervention. + +The scheduler currently supports Full, Incremental, and Preview repair types with the following features. New repair +types, such as Paxos repair or other future repair mechanisms, can be integrated with minimal development effort! + === Features -- Capability to run on multiple nodes simultaneously -- Default implementation and an interface to override the data set being repaired per repair session -- Ability to extend the token split algorithms with the following two implementations readily available: -- Splits token ranges by putting a max cap on the size of data being repaired as part of one repair session as well as a max cap at a table level - this is crucial for Incremental Repair (Default) -- Split tokens evenly based on the number of splits specified -- A new CQL table property to do the following: -- Disable repair type at a table level if one wants the scheduler to skip one or more tables -- Setting repair priority for certain tables to prioritize those tables over others -- Enabling/Disabling scheduler for each repair type dynamically -- Per Data Center per job configuration -- Rich Configuration for each repair type, e.g., full, incremental, or preview_repair -- Enough observability so an operator can configure alarms depending on their need +- Capability to run repairs on multiple nodes simultaneously. +- A default implementation and an interface to override the dataset being repaired per session. +- Extendable token split algorithms with two implementations readily available: +. Splits token ranges by placing a cap on the size of data repaired in one session and a maximum cap at the schedule +level (default). +. Splits tokens evenly based on the specified number of splits. +- A new CQL table property offering: +. The ability to disable specific repair types at the table level, allowing the scheduler to skip one or more tables. +. Configuring repair priorities for certain tables to prioritize them over others. +- Dynamic enablement or disablement of the scheduler for each repair type. +- Configurable settings tailored to each repair job. +- Rich configuration options for each repair type (e.g., Full, Incremental, or Preview repairs). +- Comprehensive observability features that allow operators to configure alarms as needed. === Yaml Configuration + +==== Top level settings +The following settings are defined at the top level of the configuration file and apply universally across all +repair types. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| enabled | false | Enable/Disable the auto-repair scheduler. If set to false, the scheduler thread will not be started. +If set to true, the repair scheduler thread will be created. The thread will check for secondary configuration available +for each repair type (full, incremental, and preview_repaired), and based on that, it will schedule repairs. +| repair_check_interval | 5m | Time interval between successive checks to see if ongoing repairs are complete or if it +is time to schedule repairs. +| repair_max_retries | 3 | Maximum number of retries for a repair session. +| repair_retry_backoff | 30s | Backoff time before retrying a repair session. +| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the +scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. +| history_clear_delete_hosts_buffer_interval | 2h | The scheduler needs to adjust its order when nodes leave the ring. +Deleted hosts are tracked in metadata. for a specified duration to ensure they are indeed removed before adjustments +are made to the schedule. +|=== + + +==== Repair level settings +The following settings can be tailored individually for each repair type. [cols=",,",options="header",] |=== | Name | Default | Description -| enabled | false | Enable/Disable auto repair scheduler -| min_repair_interval | 24h | Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes. repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round until the last repair conducted on a given node is < 24 hours. -| repair_by_keyspace | false | Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. -| number_of_repair_threads | 1 | Number of repair threads to run for a given invoked Repair Job. Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. This is similar to -j for repair options for the nodetool repair command. -| sstable_upper_threshold | 10000 | Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. This is to avoid penalizing good tables (neighbors) with an outlier. -| table_max_repair_time | 6h | Max time for repairing one table on a given node, if exceeded, skip the table. Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. -| ignore_dcs | [] | This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. -| repair_primary_token_range_only | true | Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false'. It is the same as -pr in nodetool repair options. -| parallel_repair_count | 3 | The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. This configuration controls how many nodes would run repair in parallel. The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. It will ensure the maximum number of nodes running repair do not exceed “3”. -| parallel_repair_percentage | 3 | The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment,the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. Extremely fewer manual interventions as it will rarely violate the repair SLA for customers. -| mv_repair_enabled | false | If the scheduler should repair MV table or not. -| initial_scheduler_delay | 5m | After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. -| repair_session_timeout | 3h | The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. -| force_repair_new_node | false | Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. -| token_range_splitter | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Splitter implementation to use for generating repair assignments.The default is {@link RepairTokenRangeSplitter}. The class should implement {@link IAutoRepairTokenRangeSplitter} and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) -| token_range_splitter.bytes_per_assignment | 200GiB |The target and maximum amount of bytes that should be included in a repair assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction that is expected. For all other repair types, this involves the amount of data covering the range being repaired. -| token_range_splitter.partitions_per_assignment | 1048576 | he target number of partitions that should be included in a repair assignment. This configuration exists to reduce excessive overstreaming. -| token_range_splitter.max_tables_per_assignment | 64 | The maximum number of tables that can be included in a repair assignment. This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for a keyspace. Note that the splitter will avoid batching tables together if they exceed the other configuration parameters such as bytes_per_assignment and partitions_per_assignment -| token_range_splitter.max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover an individual schedule. This serves as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up the min_repair_interval. +| enabled | false | Enable/Disable full/incremental/preview_repaired auto-repair +| repair_by_keyspace | false | If true, attempts to group tables in the same keyspace into one repair; otherwise, +each table is repaired individually. +| number_of_repair_threads | 1 | Number of threads to use for each repair job scheduled by the scheduler. Similar to +the -j option in nodetool repair. +| parallel_repair_count | 3 | Number of nodes running repair in parallel. If parallel_repair_percentage is set, the +larger value is used. +| parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If +parallel_repair_count is set, the larger value is used. Recommendation is that the repair cycle on the cluster should +finish within gc_grace_seconds. +| materialized_view_repair_enabled | false | Repairs materialized view if true. +| initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting +immediately after a restart. +| repair_session_timeout | 3h | Timeout for resuming stuck repair sessions. +| force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. +| sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 +SSTables to avoid penalizing good tables. +| table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair +proceeds to the next table. +| ignore_dcs | [] | Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify +data centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded data +centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to not run repair +schedule in certain data centers. +| repair_primary_token_range_only | true | Repair only the primary ranges owned by a node. Equivalent to the -pr option +in nodetool repair. Defaults to true. General advice is to keep this true. +| token_range_splitter | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Splitter implementation to +generate repair assignments. Defaults to RepairTokenRangeSplitter. +| token_range_splitter.partitions_per_assignment | 1048576 | Target number of partitions to include in a repair +assignment to reduce excessive overstreaming. +| token_range_splitter.max_tables_per_assignment | 64 | Maximum number of tables to include in a repair assignment. +This reduces the number of repairs, especially in keyspaces with many tables. The splitter avoids batching tables +together if they exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment. +|=== + +==== Full & Preview_Repaired repair level settings +The following settings can be tailored individually for each repair type. +[cols=",,",options="header",] +|=== +| min_repair_interval | 24h | Minimum duration between repairing the same node again. This is useful for tiny clusters, +such as clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler +completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node until 24 +hours have passed since the last repair. +| token_range_splitter.bytes_per_assignment | 200GiB | The target and maximum amount of bytes that should be included +in a repair assignment. This scopes the amount of work involved in a repair and includes the data covering the range +being repaired. +| token_range_splitter.max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover in an individual +schedule. This serves as a mechanism to throttle the work done in each repair cycle. You may reduce this value if the +impact of repairs is causing too much load on the cluster or increase it if writes outpace the amount of data being +repaired. Alternatively, adjust the min_repair_interval. +|=== + +==== Incremental repair level settings +The following settings can be customized for each repair type. +[cols=",,",options="header",] +|=== +| min_repair_interval | 1h | Incremental repairs finish quickly, so they can be run more frequently. The default is +1 hour. +| token_range_splitter.bytes_per_assignment | 50GiB | Configured to attempt repairing 50GiB of data per repair. +This throttles the amount of incremental repair and anticompaction done per schedule after incremental repairs are +turned on. +| token_range_splitter.max_bytes_per_schedule | 100GiB | The maximum number of bytes to cover in an individual schedule +to 100GiB. Consider increasing max_bytes_per_schedule if more data is written than this limit within +the min_repair_interval. |=== @@ -68,7 +150,7 @@ configuration for repair_type: full repair_primary_token_range_only: true parallel_repair_count: 3 parallel_repair_percentage: 3 - mv_repair_enabled: false + materialized_view_repair_enabled: false initial_scheduler_delay: 5m repair_session_timeout: 3h force_repair_new_node: false @@ -79,7 +161,7 @@ configuration for repair_type: full token_range_splitter.max_bytes_per_schedule: 100000GiB configuration for repair_type: incremental enabled: true - min_repair_interval: 24h + min_repair_interval: 1h repair_by_keyspace: false number_of_repair_threads: 1 sstable_upper_threshold: 10000 @@ -88,7 +170,7 @@ configuration for repair_type: incremental repair_primary_token_range_only: true parallel_repair_count: 3 parallel_repair_percentage: 3 - mv_repair_enabled: false + materialized_view_repair_enabled: false initial_scheduler_delay: 5m repair_session_timeout: 3h force_repair_new_node: false @@ -122,4 +204,4 @@ $> nodetool setautorepairconfig -t incremental number_of_repair_threads 2 ==== More details -https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Automated+Repair+Solution[CEP-37] \ No newline at end of file +https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution[CEP-37] \ No newline at end of file diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 2d18851f58b2..684a10db2e14 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -40,26 +40,24 @@ public class AutoRepairConfig implements Serializable { - // Enable/Disable auto repair scheduler. If it is set to false, then the scheduler thread will not be spun up. - // If it is set to true, then the repair scheduler thread will be created. The thread will - // look for secondary config available for each repair type (full, incremental, and preview_repaired), - // and based on that, it will schedule repairs. + // Enable/Disable the auto-repair scheduler. + // If set to false, the scheduler thread will not be started. + // If set to true, the repair scheduler thread will be created. The thread will + // check for secondary configuration available for each repair type (full, incremental, + // and preview_repaired), and based on that, it will schedule repairs. public volatile Boolean enabled; - // Time interval between successive checks for repair scheduler to check if either the ongoing repair is completed or if - // none is going, then check if it's time to schedule or wait. + // Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule + // repairs. public final DurationSpec.IntSecondsBound repair_check_interval = new DurationSpec.IntSecondsBound("5m"); - // When any nodes leave the ring then the repair schedule needs to adjust the order, etc. - // The AutoRepair scheduler keeps the deleted hosts information in its persisted metadata for the defined interval in this config. - // This information is useful so the scheduler is absolutely sure that the node is indeed removed from the ring, and then it can adjust the repair schedule accordingly. - // So, the duration in this config determinses for how long deleted host's information is kept in the scheduler's metadata. + // The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata + // for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. public volatile DurationSpec.IntSecondsBound history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("2h"); // Maximum number of retries for a repair session. public volatile Integer repair_max_retries = 3; - // Backoff time in seconds for retrying a repair session. + // Backoff time before retrying a repair session. public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); - // Minimum duration for the execution of a single repair task (i.e.: RepairRunnable). - // This helps prevent the auto-repair scheduler from overwhelming the node by scheduling a large number of - // repair tasks in a short period of time. + // Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming + // the node by scheduling too many repair tasks in a short period of time. public volatile DurationSpec.LongSecondsBound repair_task_min_duration = new DurationSpec.LongSecondsBound("5s"); // global_settings overides Options.defaultOptions for all repair types @@ -279,14 +277,14 @@ public void setParallelRepairCount(RepairType repairType, int count) getOptions(repairType).parallel_repair_count = count; } - public boolean getMVRepairEnabled(RepairType repairType) + public boolean getMaterializedViewRepairEnabled(RepairType repairType) { - return applyOverrides(repairType, opt -> opt.mv_repair_enabled); + return applyOverrides(repairType, opt -> opt.materialized_view_repair_enabled); } - public void setMVRepairEnabled(RepairType repairType, boolean enabled) + public void setMaterializedViewRepairEnabled(RepairType repairType, boolean enabled) { - getOptions(repairType).mv_repair_enabled = enabled; + getOptions(repairType).materialized_view_repair_enabled = enabled; } public void setForceRepairNewNode(RepairType repairType, boolean forceRepairNewNode) @@ -383,7 +381,38 @@ public static class Options implements Serializable { // defaultOptions defines the default auto-repair behavior when no overrides are defined @VisibleForTesting - protected static final Options defaultOptions = getDefaultOptions(); + private static Map defaultOptions; + + private static Map initializeDefaultOptions() + { + Map options = new EnumMap<>(AutoRepairConfig.RepairType.class); + options.put(AutoRepairConfig.RepairType.FULL, getDefaultOptions()); + options.put(RepairType.INCREMENTAL, getDefaultOptions()); + options.put(RepairType.PREVIEW_REPAIRED, getDefaultOptions()); + + + options.get(RepairType.FULL).min_repair_interval = new DurationSpec.IntSecondsBound("24h"); + // Incremental repairs finish quickly, so they can be run more frequently. Hence, the default is 1 hour. + options.get(RepairType.INCREMENTAL).min_repair_interval = new DurationSpec.IntSecondsBound("1h"); + options.get(RepairType.PREVIEW_REPAIRED).min_repair_interval = new DurationSpec.IntSecondsBound("24h"); + + return options; + } + + public static Map getDefaultOptionsMap() + { + if (defaultOptions == null) + { + synchronized (AutoRepairConfig.class) + { + if (defaultOptions == null) + { + defaultOptions = initializeDefaultOptions(); + } + } + } + return defaultOptions; + } public Options() { @@ -400,12 +429,11 @@ protected static Options getDefaultOptions() opts.parallel_repair_count = 3; opts.parallel_repair_percentage = 3; opts.sstable_upper_threshold = 10000; - opts.min_repair_interval = new DurationSpec.IntSecondsBound("24h"); opts.ignore_dcs = new HashSet<>(); opts.repair_primary_token_range_only = true; opts.force_repair_new_node = false; opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); - opts.mv_repair_enabled = false; + opts.materialized_view_repair_enabled = false; opts.token_range_splitter = new ParameterizedClass(DEFAULT_SPLITTER.getName(), Collections.emptyMap()); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); // 5 minutes opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours @@ -413,50 +441,42 @@ protected static Options getDefaultOptions() return opts; } - // Enable/Disable full auto repair + // Enable/Disable full or incremental or previewed_repair auto repair public volatile Boolean enabled; - // Repair table by table if this is set to 'false'. If it is 'true', then repair all the tables in a keyspace in one go. + // If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired + // individually. public volatile Boolean repair_by_keyspace; - // Number of repair threads to run for a given invoked Repair Job. - // Once the scheduler schedules one repair session, then howmany threads to use inside that job will be controlled through this parameter. - // This is similar to -j for repair options for the nodetool repair command. + // Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool + // repair. public volatile Integer number_of_repair_threads; - // The number of nodes running repair parallelly. If parallel_repair_percentage is set, it will choose the larger value of the two. - // This configuration controls how many nodes would run repair in parallel. - // The value “3” means, at any given point in time, at most 3 nodes would be running repair in parallel. These selected nodes can be from any datacenters. - // If one or more node(s) finish repair, then the framework automatically picks up the next candidate(s) based on the least repair time. - // It will ensure the maximum number of nodes running repair do not exceed “3”. + // Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. public volatile Integer parallel_repair_count; - // The percentage of nodes in the cluster that run repair parallelly. If parallel_repair_count is set, it will choose the larger value of the two. - // The problem with a fixed number of nodes (parallel_repair_count property) is that in a large-scale environment, - // the nodes keep getting added/removed due to elasticity, so if we have a fixed number, then manual interventions would increase because, on a continuous basis,operators would have to adjust to meet the SLA. - // The default is 3%, which means that 3% of the nodes in the Cassandra cluster would be repaired in parallel. - // So now, if a fleet, an operator won't have to worry about changing the repair frequency, etc., as overall repair time will continue to remain the same even if nodes are added or removed due to elasticity. - // Extremely fewer manual interventions as it will rarely violate the repair SLA for customers. + // Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value + // is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. public volatile Integer parallel_repair_percentage; - // Threshold to skip a table if it has too many sstables. The default is 10000. This means, if a table on a node has 10000 or more SSTables, then that table will be skipped. - // This is to avoid penalizing good tables (neighbors) with an outlier. + // Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good + // tables. public volatile Integer sstable_upper_threshold; - // Minimum time in hours between repairing the same node again. This is useful for extremely tiny clusters, say 5 nodes, which finishes - // repair quicly. The default is 24 hours. This means that if the scheduler finishes one round on all the nodes in < 24 hours. On a given node it won’t start a new repair round - // until the last repair conducted on a given node is < 24 hours. + // Minimum duration between repairing the same node again. This is useful for tiny clusters, such as + // clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler + // completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node + // until 24 hours have passed since the last repair. public volatile DurationSpec.IntSecondsBound min_repair_interval; - // This is useful if you want to completely avoid running repairs in one or more data centers. By default, it is empty, i.e., the framework will repair nodes in all the datacenters. - // If you want to avoid repairing nodes in one or more data centers, then you can specify the data centers in this list. + // Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify data + // centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded + // data centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to + // not run repair schedule in certain data centers. public volatile Set ignore_dcs; - // Set this 'true' if AutoRepair should repair only the primary ranges owned by this node; else, 'false'. - // It is the same as -pr in nodetool repair options. + // Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults + // to true. General advice is to keep this true. public volatile Boolean repair_primary_token_range_only; - // Whether to force immediate repair on new nodes. This is useful if you want to repair newly bootstrapped nodes immediately after they join the ring. + // Force immediate repair on new nodes after they join the ring. public volatile Boolean force_repair_new_node; - // Max time for repairing one table on a given node, if exceeded, skip the table. - // Let's say there is a Cassandra cluster in that there are 10 tables belonging to 10 different customers. - // Out of these 10 tables, 1 table is humongous. Repairing this 1 table, say, takes 5 days, in the worst case, but others could finish in just 1 hour. - // Then we would penalize 9 customers just because of one bad actor, and those 9 customers would ping an operator and would require a lot of back-and-forth manual interventions, etc. - // So, the idea here is to penalize the outliers instead of good candidates. This can easily be configured with a higher value if we want to disable the functionality. + // Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the + // next table. public volatile DurationSpec.IntSecondsBound table_max_repair_time; - // If the scheduler should repair MV table or not. - public volatile Boolean mv_repair_enabled; + // Repairs materialized view if true. + public volatile Boolean materialized_view_repair_enabled; /** * Splitter implementation to use for generating repair assignments. *

    @@ -466,7 +486,7 @@ protected static Options getDefaultOptions() public volatile ParameterizedClass token_range_splitter; // After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; - // The major issue with Repair is sometimes repair session hangs; so this timeout is useful to resume such stuck repair sessions. + // Timeout for resuming stuck repair sessions. public volatile DurationSpec.IntSecondsBound repair_session_timeout; public String toString() @@ -483,7 +503,7 @@ public String toString() ", repair_primary_token_range_only=" + repair_primary_token_range_only + ", force_repair_new_node=" + force_repair_new_node + ", table_max_repair_time=" + table_max_repair_time + - ", mv_repair_enabled=" + mv_repair_enabled + + ", materialized_view_repair_enabled=" + materialized_view_repair_enabled + ", token_range_splitter=" + token_range_splitter + ", intial_scheduler_delay=" + initial_scheduler_delay + ", repair_session_timeout=" + repair_session_timeout + @@ -522,6 +542,6 @@ protected T applyOverrides(RepairType repairType, Function optio } // Otherwise check defaults - return getOverride(Options.defaultOptions, optionSupplier); + return getOverride(Options.getDefaultOptionsMap().get(repairType), optionSupplier); } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 29204670028b..a35542186561 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -835,7 +835,7 @@ public static boolean keyspaceMaxRepairTimeExceeded(RepairType repairType, long public static List getAllMVs(RepairType repairType, Keyspace keyspace, TableMetadata tableMetadata) { List allMvs = new ArrayList<>(); - if (AutoRepairService.instance.getAutoRepairConfig().getMVRepairEnabled(repairType) && keyspace.getMetadata().views != null) + if (AutoRepairService.instance.getAutoRepairConfig().getMaterializedViewRepairEnabled(repairType) && keyspace.getMetadata().views != null) { Iterator views = keyspace.getMetadata().views.forTable(tableMetadata.id).iterator(); while (views.hasNext()) diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 4056e6e6a930..74a8259a4a3c 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -182,7 +182,7 @@ public void setParallelRepairCount(RepairType repairType, int count) public void setMVRepairEnabled(RepairType repairType, boolean enabled) { - config.setMVRepairEnabled(repairType, enabled); + config.setMaterializedViewRepairEnabled(repairType, enabled); } public void setRepairSessionTimeout(RepairType repairType, String timeout) diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index d11cf02aa76b..e953b7a3d4ce 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2663,7 +2663,7 @@ public void setPrimaryTokenRangeOnly(AutoRepairConfig.RepairType repairType, boo autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); } - public void setMVRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) + public void setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) { autoRepairProxy.setMVRepairEnabled(repairType, enabled); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index 296049f78408..de234893cb49 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -85,7 +85,7 @@ private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, Au appendConfig(sb , "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); appendConfig(sb , "parallel_repair_count", config.getParallelRepairCount(repairType)); appendConfig(sb , "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); - appendConfig(sb , "mv_repair_enabled", config.getMVRepairEnabled(repairType)); + appendConfig(sb , "materialized_view_repair_enabled", config.getMaterializedViewRepairEnabled(repairType)); appendConfig(sb , "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); appendConfig(sb , "repair_session_timeout", config.getRepairSessionTimeout(repairType)); appendConfig(sb , "force_repair_new_node", config.getForceRepairNewNode(repairType)); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index d7f36f801613..52704dfcd8f2 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -47,7 +47,7 @@ public class SetAutoRepairConfig extends NodeToolCmd "[start_scheduler|number_of_repair_threads|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + - "|parallel_repair_count|parallel_repair_percentage|mv_repair_enabled|repair_max_retries" + + "|parallel_repair_count|parallel_repair_percentage|materialized_view_repair_enabled|repair_max_retries" + "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration|token_range_splitter.]", required = true) protected List args = new ArrayList<>(); @@ -160,8 +160,8 @@ public void execute(NodeProbe probe) case "parallel_repair_percentage": probe.setParallelRepairPercentage(repairType, Integer.parseInt(paramVal)); break; - case "mv_repair_enabled": - probe.setMVRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); + case "materialized_view_repair_enabled": + probe.setMaterializedViewRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); break; case "repair_session_timeout": probe.setRepairSessionTimeout(repairType, paramVal); diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 43bb8602485f..b2708b08b81a 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -304,7 +304,7 @@ public void fromMapTest() assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(true, config.auto_repair.enabled); assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.INCREMENTAL)); - config.auto_repair.setMVRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); + config.auto_repair.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 8f02baa52f57..b95b8dd84b21 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -18,6 +18,8 @@ package org.apache.cassandra.repair.autorepair; +import java.util.HashSet; +import java.util.Map; import java.util.Objects; import java.util.Collections; import java.util.Set; @@ -39,6 +41,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) @@ -305,11 +308,11 @@ public void testSetParallelRepairCountInGroup() } @Test - public void testGetMVRepairEnabled() + public void testGetMaterializedViewRepairEnabled() { - config.global_settings.mv_repair_enabled = true; + config.global_settings.materialized_view_repair_enabled = true; - boolean result = config.getMVRepairEnabled(repairType); + boolean result = config.getMaterializedViewRepairEnabled(repairType); assertTrue(result); } @@ -317,9 +320,9 @@ public void testGetMVRepairEnabled() @Test public void testSetMVRepairEnabled() { - config.setMVRepairEnabled(repairType, true); + config.setMaterializedViewRepairEnabled(repairType, true); - assertTrue(config.getOptions(repairType).mv_repair_enabled); + assertTrue(config.getOptions(repairType).materialized_view_repair_enabled); } @Test @@ -363,7 +366,7 @@ public void testGetDefaultOptionsMVRepairIsEnabledByDefault() { Options defaultOptions = Options.getDefaultOptions(); - assertFalse(defaultOptions.mv_repair_enabled); + assertFalse(defaultOptions.materialized_view_repair_enabled); } @Test @@ -409,4 +412,54 @@ public void testSetRepairSessionTimeout() assert config.getOptions(repairType).repair_session_timeout.toSeconds() == 3600; } + @Test + public void testDefaultOptions() + { + Map defaultOptions = Options.getDefaultOptionsMap(); + Options options = defaultOptions.get(repairType); + assertFalse(options.enabled); + assertFalse(options.repair_by_keyspace); + assertEquals(Integer.valueOf(1), options.number_of_repair_threads); + assertEquals(Integer.valueOf(3), options.parallel_repair_count); + assertEquals(Integer.valueOf(3), options.parallel_repair_percentage); + assertEquals(Integer.valueOf(10000), options.sstable_upper_threshold); + assertEquals(new HashSet<>(), options.ignore_dcs); + assertTrue(options.repair_primary_token_range_only); + assertFalse(options.force_repair_new_node); + assertEquals(new DurationSpec.IntSecondsBound("6h"), options.table_max_repair_time); + assertFalse(options.materialized_view_repair_enabled); + assertEquals(new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()), options.token_range_splitter); + assertEquals(new DurationSpec.IntSecondsBound("5m"), options.initial_scheduler_delay); + assertEquals(new DurationSpec.IntSecondsBound("3h"), options.repair_session_timeout); + + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) + { + assertEquals(new DurationSpec.IntSecondsBound("1h"), options.min_repair_interval); + } + else + { + assertEquals(new DurationSpec.IntSecondsBound("24h"), options.min_repair_interval); + } + } + + @Test + public void testGlobalOptions() + { + AutoRepairConfig config = new AutoRepairConfig(); + assertFalse(config.global_settings.enabled); + assertFalse(config.global_settings.repair_by_keyspace); + assertEquals(Integer.valueOf(1), config.global_settings.number_of_repair_threads); + assertEquals(Integer.valueOf(3), config.global_settings.parallel_repair_count); + assertEquals(Integer.valueOf(3), config.global_settings.parallel_repair_percentage); + assertEquals(Integer.valueOf(10000), config.global_settings.sstable_upper_threshold); + assertEquals(new HashSet<>(), config.global_settings.ignore_dcs); + assertTrue(config.global_settings.repair_primary_token_range_only); + assertFalse(config.global_settings.force_repair_new_node); + assertEquals(new DurationSpec.IntSecondsBound("6h"), config.global_settings.table_max_repair_time); + assertFalse(config.global_settings.materialized_view_repair_enabled); + assertEquals(new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()), config.global_settings.token_range_splitter); + assertEquals(new DurationSpec.IntSecondsBound("5m"), config.global_settings.initial_scheduler_delay); + assertEquals(new DurationSpec.IntSecondsBound("3h"), config.global_settings.repair_session_timeout); + assertNull(config.global_settings.min_repair_interval); + } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index a6b60a9f4c1c..2ae468c6170f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -207,7 +207,7 @@ private void resetConfig() for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { defaultConfig.setAutoRepairEnabled(repairType, true); - defaultConfig.setMVRepairEnabled(repairType, false); + defaultConfig.setMaterializedViewRepairEnabled(repairType, false); } // reset the AutoRepairService config to default @@ -368,15 +368,15 @@ public void testRepairPrimaryRangesByDefault() public void testGetAllMVs() { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setMVRepairEnabled(repairType, false); - assertFalse(config.getMVRepairEnabled(repairType)); + config.setMaterializedViewRepairEnabled(repairType, false); + assertFalse(config.getMaterializedViewRepairEnabled(repairType)); assertEquals(0, AutoRepairUtils.getAllMVs(repairType, keyspace, cfm).size()); - config.setMVRepairEnabled(repairType, true); + config.setMaterializedViewRepairEnabled(repairType, true); - assertTrue(config.getMVRepairEnabled(repairType)); + assertTrue(config.getMaterializedViewRepairEnabled(repairType)); assertEquals(Arrays.asList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); - config.setMVRepairEnabled(repairType, false); + config.setMaterializedViewRepairEnabled(repairType, false); } @@ -384,20 +384,20 @@ public void testGetAllMVs() public void testMVRepair() { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setMVRepairEnabled(repairType, true); + config.setMaterializedViewRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); AutoRepair.instance.repair(repairType); assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - config.setMVRepairEnabled(repairType, false); + config.setMaterializedViewRepairEnabled(repairType, false); AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); AutoRepair.instance.repair(repairType); assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); - config.setMVRepairEnabled(repairType, true); + config.setMaterializedViewRepairEnabled(repairType, true); AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); AutoRepair.instance.repair(repairType); assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); @@ -433,7 +433,7 @@ public void testSkipRepairSSTableCountHigherThreshold() assert diffMVTable.size() == 10; int beforeCount = config.getRepairSSTableCountHigherThreshold(repairType); - config.setMVRepairEnabled(repairType, true); + config.setMaterializedViewRepairEnabled(repairType, true); config.setRepairSSTableCountHigherThreshold(repairType, 9); assertEquals(0, state.getSkippedTokenRangesCount()); assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); @@ -476,7 +476,7 @@ public void testGetRepairState() public void testMetrics() { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setMVRepairEnabled(repairType, true); + config.setMaterializedViewRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); config.setRepairRetryBackoff("0s"); config.setAutoRepairTableMaxRepairTime(repairType, "0s"); @@ -514,7 +514,7 @@ public void testMetrics() public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws Exception { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setMVRepairEnabled(repairType, false); + config.setMaterializedViewRepairEnabled(repairType, false); config.setRepairRetryBackoff("0s"); when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); @@ -611,7 +611,7 @@ public void testRepairShufflesKeyspacesAndTables() public void testRepairTakesLastRepairTimeFromDB() { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setMVRepairEnabled(repairType, true); + config.setMaterializedViewRepairEnabled(repairType, true); long lastRepairTime = System.currentTimeMillis() - 1000; AutoRepairUtils.insertNewRepairHistory(repairType, 0, lastRepairTime); AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index c69809618216..2ac939f4ab16 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -73,7 +73,7 @@ public static Collection testCases() { forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), - forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMVRepairEnabled), + forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMaterializedViewRepairEnabled), forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, AutoRepairServiceSetterTest::isLocalHostForceRepair) ).flatMap(Function.identity()).collect(Collectors.toList()); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 7fde3920afda..1207186fb6b8 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -284,7 +284,7 @@ public static Collection testCases() forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type, true)), forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type, 6)), forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type, 7)), - forEachRepairType("mv_repair_enabled", "true", (type) -> verify(probe, times(1)).setMVRepairEnabled(type, true)), + forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setMaterializedViewRepairEnabled(type, true)), forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))), forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type, "max_bytes_per_schedule", "500GiB")) ).flatMap(Function.identity()).collect(Collectors.toList()); From 0da1c04257917d251cf71224d2cdef98d4b236f6 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 20 Jan 2025 20:07:43 -0800 Subject: [PATCH 124/257] address cr from Andy --- conf/cassandra.yaml | 14 +++++++--- conf/cassandra_latest.yaml | 14 +++++++--- .../cassandra/pages/auto-repair/overview.adoc | 12 ++++++--- .../repair/autorepair/AutoRepairConfig.java | 6 +++-- .../autorepair/RepairTokenRangeSplitter.java | 2 ++ .../cassandra/service/AutoRepairService.java | 4 +-- .../AutoRepairParameterizedTest.java | 26 +++++++++++++++++++ .../repair/autorepair/AutoRepairTest.java | 12 +++++++++ .../service/AutoRepairServiceBasicTest.java | 18 +++++++++++++ 9 files changed, 92 insertions(+), 16 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 3bb32f41e2f8..82ba965a84f8 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2703,10 +2703,15 @@ storage_compatibility_mode: NONE # # value if the impact of repairs is causing too much load on the cluster or increase it # # if writes outpace the amount of data being repaired. Alternatively, adjust the # # min_repair_interval. +# # This is set to a large value for full repair to attempt to repair all data per repair schedule. # max_bytes_per_schedule: 100000GiB # incremental: # enabled: true -# # Incremental repairs finish quickly, so they can be run more frequently. The default is 1 hour. +# # Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps +# # the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent +# # schedule such as 1h is recommended. +# # When turning on incremental repair for the first time with a decent amount of data it may be advisable to +# # increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. # min_repair_interval: 1h # token_range_splitter: # parameters: @@ -2738,6 +2743,7 @@ storage_compatibility_mode: NONE # # The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata # # for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. # history_clear_delete_hosts_buffer_interval: 2h +# # NOTE: Each of the below settings can be overridden per repair type under repair_type_overrides # global_settings: # # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired # # individually. @@ -2750,7 +2756,7 @@ storage_compatibility_mode: NONE # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value # # is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. # parallel_repair_percentage: 3 -# # Repairs materialized view if true. +# # Repairs materialized views if true. # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. # initial_scheduler_delay: 5m @@ -2772,9 +2778,9 @@ storage_compatibility_mode: NONE # # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults # # to true. General advice is to keep this true. # repair_primary_token_range_only: true -# # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. -# token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter # token_range_splitter: +# # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. +# class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter # parameters: # # Target number of partitions to include in a repair assignment to reduce excessive overstreaming. # partitions_per_assignment: 1048576 diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 46cb642ef04d..0b3a0d857a0a 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2391,10 +2391,15 @@ storage_compatibility_mode: NONE # # value if the impact of repairs is causing too much load on the cluster or increase it # # if writes outpace the amount of data being repaired. Alternatively, adjust the # # min_repair_interval. +# # This is set to a large value for full repair to attempt to repair all data per repair schedule. # max_bytes_per_schedule: 100000GiB # incremental: # enabled: true -# # Incremental repairs finish quickly, so they can be run more frequently. The default is 1 hour. +# # Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps +# # the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent +# # schedule such as 1h is recommended. +# # When turning on incremental repair for the first time with a decent amount of data it may be advisable to +# # increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. # min_repair_interval: 1h # token_range_splitter: # parameters: @@ -2426,6 +2431,7 @@ storage_compatibility_mode: NONE # # The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata # # for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. # history_clear_delete_hosts_buffer_interval: 2h +# # NOTE: Each of the below settings can be overridden per repair type under repair_type_overrides # global_settings: # # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired # # individually. @@ -2438,7 +2444,7 @@ storage_compatibility_mode: NONE # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value # # is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. # parallel_repair_percentage: 3 -# # Repairs materialized view if true. +# # Repairs materialized views if true. # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. # initial_scheduler_delay: 5m @@ -2460,9 +2466,9 @@ storage_compatibility_mode: NONE # # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults # # to true. General advice is to keep this true. # repair_primary_token_range_only: true -# # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. -# token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter # token_range_splitter: +# # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. +# class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter # parameters: # # Target number of partitions to include in a repair assignment to reduce excessive overstreaming. # partitions_per_assignment: 1048576 diff --git a/doc/modules/cassandra/pages/auto-repair/overview.adoc b/doc/modules/cassandra/pages/auto-repair/overview.adoc index d9961a5e752a..1eb54a595ccd 100644 --- a/doc/modules/cassandra/pages/auto-repair/overview.adoc +++ b/doc/modules/cassandra/pages/auto-repair/overview.adoc @@ -73,7 +73,7 @@ larger value is used. | parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. -| materialized_view_repair_enabled | false | Repairs materialized view if true. +| materialized_view_repair_enabled | false | Repairs materialized views if true. | initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. | repair_session_timeout | 3h | Timeout for resuming stuck repair sessions. @@ -111,15 +111,19 @@ being repaired. | token_range_splitter.max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover in an individual schedule. This serves as a mechanism to throttle the work done in each repair cycle. You may reduce this value if the impact of repairs is causing too much load on the cluster or increase it if writes outpace the amount of data being -repaired. Alternatively, adjust the min_repair_interval. +repaired. Alternatively, adjust the min_repair_interval. This is set to a large value for full repair to attempt to +repair all data per repair schedule. |=== ==== Incremental repair level settings The following settings can be customized for each repair type. [cols=",,",options="header",] |=== -| min_repair_interval | 1h | Incremental repairs finish quickly, so they can be run more frequently. The default is -1 hour. +| min_repair_interval | 1h | Incremental repairs operate over unrepaired data and should finish quickly. Running them +more frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, +so a more frequent schedule such as 1h is recommended. When turning on incremental repair for the first time with a +decent amount of data it may be advisable to increase this interval to 24h or longer to reduce the impact of +anticompaction caused by incremental repair. | token_range_splitter.bytes_per_assignment | 50GiB | Configured to attempt repairing 50GiB of data per repair. This throttles the amount of incremental repair and anticompaction done per schedule after incremental repairs are turned on. diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 684a10db2e14..e32374e8dc55 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -392,7 +392,9 @@ private static Map initializeDefaultOption options.get(RepairType.FULL).min_repair_interval = new DurationSpec.IntSecondsBound("24h"); - // Incremental repairs finish quickly, so they can be run more frequently. Hence, the default is 1 hour. + // Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps + // the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent + // increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. options.get(RepairType.INCREMENTAL).min_repair_interval = new DurationSpec.IntSecondsBound("1h"); options.get(RepairType.PREVIEW_REPAIRED).min_repair_interval = new DurationSpec.IntSecondsBound("24h"); @@ -475,7 +477,7 @@ protected static Options getDefaultOptions() // Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the // next table. public volatile DurationSpec.IntSecondsBound table_max_repair_time; - // Repairs materialized view if true. + // Repairs materialized views if true. public volatile Boolean materialized_view_repair_enabled; /** * Splitter implementation to use for generating repair assignments. diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 1b81c40f6112..850db0f66b6d 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -131,6 +131,8 @@ *

  • * full: Configured in a way that attempts to accomplish repairing all data in a schedule, with * individual repairs targeting at most 200GiB of data and 1048576 partitions. + * max_bytes_per_schedule is set to a large value for full repair to attempt to repair all data per + * repair schedule. *
      *
    • bytes_per_assignment: 200GiB
    • *
    • partitions_per_assignment: 2^repair_session_max_tree_depth (2^20 == 1048576 by default)
    • diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 74a8259a4a3c..9e3b56d18a27 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -65,10 +65,10 @@ public void checkCanRun(RepairType repairType) if (repairType != RepairType.INCREMENTAL) return; - if (DatabaseDescriptor.isMaterializedViewsOnRepairEnabled()) + if (config.getMaterializedViewRepairEnabled(repairType) && DatabaseDescriptor.isMaterializedViewsOnRepairEnabled()) throw new ConfigurationException("Cannot run incremental repair while materialized view replay is enabled. Set materialized_views_on_repair_enabled to false."); - if (DatabaseDescriptor.isCDCOnRepairEnabled()) + if (DatabaseDescriptor.isCDCEnabled() && DatabaseDescriptor.isCDCOnRepairEnabled()) throw new ConfigurationException("Cannot run incremental repair while CDC replay is enabled. Set cdc_on_repair_enabled to false."); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 2ae468c6170f..9e80bae37e59 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -677,11 +677,36 @@ public void testRepairSuccessAfterRetry() verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); } + @Test + public void testRepairDoesNotThrowsForIRWithMVReplayButMVRepairDisabled() + { + AutoRepair.instance.setup(); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + AutoRepairService.instance.getAutoRepairConfig().setMaterializedViewRepairEnabled(repairType, false); + + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) + { + try + { + AutoRepair.instance.repair(repairType); + } + catch (ConfigurationException ignored) + { + fail("ConfigurationException not expected"); + } + } + else + { + AutoRepair.instance.repair(repairType); + } + } + @Test public void testRepairThrowsForIRWithMVReplay() { AutoRepair.instance.setup(); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + AutoRepairService.instance.getAutoRepairConfig().setMaterializedViewRepairEnabled(repairType, true); if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) { @@ -705,6 +730,7 @@ public void testRepairThrowsForIRWithMVReplay() public void testRepairThrowsForIRWithCDCReplay() { AutoRepair.instance.setup(); + DatabaseDescriptor.setCDCEnabled(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 283527f61117..0accb33b0d37 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -110,10 +110,22 @@ public void testSetupFailsWhenIREnabledWithCDCReplay() instance.setup(); } + @Test + public void testNoFailureIfMVRepairOnButConfigIsOff() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); + DatabaseDescriptor.getAutoRepairConfig().setMaterializedViewRepairEnabled(RepairType.INCREMENTAL, false); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + AutoRepair instance = new AutoRepair(); + instance.setup(); + } + @Test(expected = ConfigurationException.class) public void testSetupFailsWhenIREnabledWithMVReplay() { DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); + DatabaseDescriptor.getAutoRepairConfig().setMaterializedViewRepairEnabled(RepairType.INCREMENTAL, true); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); AutoRepair instance = new AutoRepair(); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index f8911acbd3ee..35ceb90a3f1a 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -87,9 +87,18 @@ public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisabled() { + autoRepairService.config = new AutoRepairConfig(true); + autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + } + @Test(expected = ConfigurationException.class) public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { autoRepairService.config = new AutoRepairConfig(true); + autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } @@ -102,10 +111,19 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled() { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + DatabaseDescriptor.setCDCEnabled(false); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + } + @Test(expected = ConfigurationException.class) public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); + DatabaseDescriptor.setCDCEnabled(true); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); } From 6d124c635a39982ff4db29372a2a9b950179462b Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 22 Jan 2025 23:36:45 -0600 Subject: [PATCH 125/257] partitions_per_assignment to not use repair_session_max_tree_depth Updates partitions_per_assignment to not be based on repair_session_max_tree_depth which is deprecated. Instead, just use 2^20. Also updates documentation around partitions_per_assignment and cleans up some warnings in RepairTokeRangeSplitter. patch by Andy Tolbert; reviewed by Jaydeep Chovatia for CASSANDRA-20231 --- conf/cassandra.yaml | 3 ++- conf/cassandra_latest.yaml | 3 ++- .../cassandra/pages/auto-repair/overview.adoc | 4 +-- .../autorepair/RepairTokenRangeSplitter.java | 27 ++++++++++--------- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 82ba965a84f8..38b0e6e12fda 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2782,7 +2782,8 @@ storage_compatibility_mode: NONE # # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter # parameters: -# # Target number of partitions to include in a repair assignment to reduce excessive overstreaming. +# # Maximum number of partitions to include in a repair assignment. Used to reduce number of partitions +# # present in merkle tree leaf nodes to avoid overstreaming. # partitions_per_assignment: 1048576 # # Maximum number of tables to include in a repair assignment. This reduces the number of repairs, # # especially in keyspaces with many tables. The splitter avoids batching tables together if they diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 0b3a0d857a0a..baf44a2c0d0b 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2470,7 +2470,8 @@ storage_compatibility_mode: NONE # # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter # parameters: -# # Target number of partitions to include in a repair assignment to reduce excessive overstreaming. +# # Maximum number of partitions to include in a repair assignment. Used to reduce number of partitions +# # present in merkle tree leaf nodes to avoid overstreaming. # partitions_per_assignment: 1048576 # # Maximum number of tables to include in a repair assignment. This reduces the number of repairs, # # especially in keyspaces with many tables. The splitter avoids batching tables together if they diff --git a/doc/modules/cassandra/pages/auto-repair/overview.adoc b/doc/modules/cassandra/pages/auto-repair/overview.adoc index 1eb54a595ccd..d9c3b60ec7ee 100644 --- a/doc/modules/cassandra/pages/auto-repair/overview.adoc +++ b/doc/modules/cassandra/pages/auto-repair/overview.adoc @@ -90,8 +90,8 @@ schedule in certain data centers. in nodetool repair. Defaults to true. General advice is to keep this true. | token_range_splitter | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. -| token_range_splitter.partitions_per_assignment | 1048576 | Target number of partitions to include in a repair -assignment to reduce excessive overstreaming. +| token_range_splitter.partitions_per_assignment | 1048576 | Maximum number of partitions to include in a repair +assignment. Used to reduce number of partitions present in merkle tree leaf nodes to avoid overstreaming. | token_range_splitter.max_tables_per_assignment | 64 | Maximum number of tables to include in a repair assignment. This reduces the number of repairs, especially in keyspaces with many tables. The splitter avoids batching tables together if they exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment. diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 850db0f66b6d..c7e5f4598879 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -37,6 +37,8 @@ import java.util.stream.Collectors; import com.google.common.annotations.VisibleForTesting; +import org.apache.cassandra.tcm.compatibility.TokenRingUtils; +import org.apache.cassandra.utils.FBUtilities; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,7 +46,6 @@ import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus; import com.clearspring.analytics.stream.cardinality.ICardinality; import org.apache.cassandra.config.DataStorageSpec; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.db.lifecycle.SSTableIntervalTree; @@ -109,8 +110,9 @@ * repaired. * *
    • - * partitions_per_assignment: The target number of partitions that should be included in a repair - * assignment. This configuration exists to reduce excessive overstreaming. + * partitions_per_assignment: The maximum number of partitions that should be included in a repair + * assignment. This configuration exists to reduce excessive overstreaming by attempting to limit the number + * of partitions present in a merkle tree leaf node. *
    • *
    • * max_tables_per_assignment: The maximum number of tables that can be included in a repair assignment. @@ -135,7 +137,7 @@ * repair schedule. *
        *
      • bytes_per_assignment: 200GiB
      • - *
      • partitions_per_assignment: 2^repair_session_max_tree_depth (2^20 == 1048576 by default)
      • + *
      • partitions_per_assignment: 1048576
      • *
      • max_tables_per_assignment: 64
      • *
      • max_bytes_per_schedule: 100TiB
      • *
      @@ -147,7 +149,7 @@ * more than this much data is written per min_repair_interval. *
        *
      • bytes_per_assignment: 50GiB
      • - *
      • partitions_per_assignment: 2^repair_session_max_tree_depth (2^20 == 1048576 by default)
      • + *
      • partitions_per_assignment: 1048576
      • *
      • max_tables_per_assignment: 64
      • *
      • max_bytes_per_schedule: 100GiB
      • *
      @@ -157,7 +159,7 @@ * with previews targeting at most 200GiB of data and 1048576 partitions. *
        *
      • bytes_per_assignment: 200GiB
      • - *
      • partitions_per_assignment: 2^repair_session_max_tree_depth (2^20 == 1048576 by default)
      • + *
      • partitions_per_assignment: 1048576
      • *
      • max_tables_per_assignment: 64
      • *
      • max_bytes_per_schedule: 100TiB
      • *
      @@ -204,7 +206,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 100GiB of * data is written to a node per min_repair_interval. */ - private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap(AutoRepairConfig.RepairType.class) {{ + private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) {{ put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) .build()); // Restrict incremental repair to 50GB bytes per assignment to confine the amount of possible autocompaction. @@ -317,7 +319,7 @@ List getRepairAssignmentsForKeyspace(AutoRepairConfig.Rep tableNames.sort((t1, t2) -> { ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); - // If for whatever reason the CFS is not retrievable, we can assume its been deleted, so give the + // If for whatever reason the CFS is not retrievable, we can assume it has been deleted, so give the // other cfs precedence. if (cfs1 == null) { @@ -636,7 +638,7 @@ private Collection> getTokenRanges(boolean primaryRangeOnly, String Collection> wrappedRanges; if (primaryRangeOnly) { - wrappedRanges = StorageService.instance.getPrimaryRanges(keyspaceName); + wrappedRanges = TokenRingUtils.getPrimaryRangesForEndpoint(keyspaceName, FBUtilities.getBroadcastAddressAndPort()); } else { @@ -687,8 +689,6 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai long sstableSize = reader.bytesOnDisk(); totalBytes += sstableSize; - // TODO since reading the index file anyway may be able to get more accurate ratio for partition count, - // still better to use the cardinality estimator then the index since it wont count duplicates. // get the bounds of the sstable for this range using the index file but do not actually read it. List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); @@ -734,7 +734,7 @@ static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repa if (cfs == null) { logSkippingTable(keyspaceName, tableName); - return Refs.ref(Collections.emptyList()); + return Refs.ref(Collections.emptyList()); } Refs refs = null; @@ -954,7 +954,8 @@ static class RepairTypeDefaultsBuilder { private final AutoRepairConfig.RepairType repairType; private DataStorageSpec.LongBytesBound bytesPerAssignment = new DataStorageSpec.LongBytesBound("200GiB"); - private long partitionsPerAssignment = (long) Math.pow(2, DatabaseDescriptor.getRepairSessionMaxTreeDepth()); + // Aims to target at most 1 partitons per leaf assuming a merkle tree of depth 20 (2^20 = 1,048,576) + private long partitionsPerAssignment = 1_048_576; private int maxTablesPerAssignment = 64; private DataStorageSpec.LongBytesBound maxBytesPerSchedule = MAX_BYTES; From f8eb10ff33eb66e29eb159c13ceb4765b7a55081 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:48:50 -0600 Subject: [PATCH 126/257] Fix inconsistent spacing in javadoc --- .../autorepair/RepairTokenRangeSplitter.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index c7e5f4598879..05fda709eabf 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -93,9 +93,9 @@ *
    • * * To manage these issues, the strategy involves estimating the size and number of partitions within a range and - * splitting it accordingly to bound the size of the range splits. This is established by iterating over SSTable + * splitting it accordingly to bound the size of the range splits. This is established by iterating over SSTable * index files to estimate the amount of bytes and partitions involved in the ranges being repaired and by what - * repair type is beinb invoked. + * repair type is being invoked. *

      * While this splitter has a lot of tuning parameters, the expectation is that the established default configuration * shall be sensible for all {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType}'s. The following @@ -103,28 +103,28 @@ *

        *
      • * bytes_per_assigment: The target and maximum amount of bytes that should be included in a repair - * assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this + * assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this * involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being - * repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction + * repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction * that is expected. For all other repair types, this involves the amount of data covering the range being * repaired. *
      • *
      • * partitions_per_assignment: The maximum number of partitions that should be included in a repair - * assignment. This configuration exists to reduce excessive overstreaming by attempting to limit the number + * assignment. This configuration exists to reduce excessive overstreaming by attempting to limit the number * of partitions present in a merkle tree leaf node. *
      • *
      • * max_tables_per_assignment: The maximum number of tables that can be included in a repair assignment. * This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for - * a keyspace. Note that the splitter will avoid batching tables together if they exceed the other + * a keyspace. Note that the splitter will avoid batching tables together if they exceed the other * configuration parameters such as bytes_per_assignment and partitions_per_assignment. *
      • *
      • - * max_bytes_per_schedule: The maximum number of bytes to cover an individual schedule. This serves - * as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to + * max_bytes_per_schedule: The maximum number of bytes to cover an individual schedule. This serves + * as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to * reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if - * writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up + * writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up * the min_repair_interval. *
      • *
      @@ -144,8 +144,8 @@ * *
    • * incremental: Configured in a way that attempts to repair 50GiB of data per repair, and 100GiB per - * schedule. This attempts to throttle the amount of IR and anticompaction done per schedule after turning - * incremental on for the first time. You may want to consider increasing max_bytes_per_schedule + * schedule. This attempts to throttle the amount of IR and anticompaction done per schedule after turning + * incremental on for the first time. You may want to consider increasing max_bytes_per_schedule * more than this much data is written per min_repair_interval. *
        *
      • bytes_per_assignment: 50GiB
      • @@ -201,7 +201,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter *
          max_bytes_per_schedule: 1000GiB
        * * It's expected that these defaults should work well for everything except incremental, so we confine - * bytes_per_assignment to 50GiB and max_bytes_per_schedule to 100GiB. This should strike a good balance + * bytes_per_assignment to 50GiB and max_bytes_per_schedule to 100GiB. This should strike a good balance * between the amount of data that will be repaired during an initial migration to incremental repair and should * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 100GiB of * data is written to a node per min_repair_interval. @@ -288,7 +288,7 @@ private class BytesBasedRepairAssignmentIterator extends RepairAssignmentIterato protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) { // short circuit if we've accumulated too many bytes by returning a KeyspaceRepairAssignments with - // no assignments. We do this rather than returning false in hasNext() because we want to signal + // no assignments. We do this rather than returning false in hasNext() because we want to signal // to AutoRepair that a keyspace generated no assignments. if (bytesSoFar >= maxBytesPerSchedule.toBytes()) { @@ -815,7 +815,7 @@ protected static class SizeEstimate public final long sizeInRange; public final long totalSize; /** - * Size to consider in the repair. For incremental repair, we want to consider the total size + * Size to consider in the repair. For incremental repair, we want to consider the total size * of the estimate as we have to factor in anticompacting the entire SSTable. * For full repair, just use the size containing the range. */ @@ -884,7 +884,7 @@ public String getDescription() { } /** - * Estimated bytes involved in the assignment. Typically Derived from {@link SizeEstimate#sizeForRepair}. + * Estimated bytes involved in the assignment. Typically Derived from {@link SizeEstimate#sizeForRepair}. * @return estimated bytes involved in the assignment. */ public long getEstimatedBytes() From 9a13bd8e59dac64b1b2ef1931038ac0c094d77f1 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 22 Jan 2025 22:56:16 -0600 Subject: [PATCH 127/257] Make AutoRepair's repair_by_keyspace default to true instead of false Creating repair sessions by table can create an overwhelming amount of repairs, especially with vnodes. If a repair assignment is too big (> 64 tables by default, or > 200GB for full/50GB for incremental) RepairTokenRangeSplitter will already split into multiple repair assignments. Adjusts repair_by_keyspace to default to true. patch by Andy Tolbert; reviewed by Jaydeep Chovatia for CASSANDRA-20232 --- conf/cassandra.yaml | 2 +- conf/cassandra_latest.yaml | 2 +- doc/modules/cassandra/pages/auto-repair/overview.adoc | 6 +++--- .../cassandra/repair/autorepair/AutoRepairConfig.java | 2 +- .../repair/autorepair/AutoRepairConfigTest.java | 11 +++++++---- .../autorepair/RepairTokenRangeSplitterTest.java | 1 - 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 38b0e6e12fda..5b6e0dc51fc5 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2747,7 +2747,7 @@ storage_compatibility_mode: NONE # global_settings: # # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired # # individually. -# repair_by_keyspace: false +# repair_by_keyspace: true # # Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool # # repair. # number_of_repair_threads: 1 diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index baf44a2c0d0b..ed99e9eece19 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2435,7 +2435,7 @@ storage_compatibility_mode: NONE # global_settings: # # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired # # individually. -# repair_by_keyspace: false +# repair_by_keyspace: true # # Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool # # repair. # number_of_repair_threads: 1 diff --git a/doc/modules/cassandra/pages/auto-repair/overview.adoc b/doc/modules/cassandra/pages/auto-repair/overview.adoc index d9c3b60ec7ee..698e53c44677 100644 --- a/doc/modules/cassandra/pages/auto-repair/overview.adoc +++ b/doc/modules/cassandra/pages/auto-repair/overview.adoc @@ -64,7 +64,7 @@ The following settings can be tailored individually for each repair type. |=== | Name | Default | Description | enabled | false | Enable/Disable full/incremental/preview_repaired auto-repair -| repair_by_keyspace | false | If true, attempts to group tables in the same keyspace into one repair; otherwise, +| repair_by_keyspace | true | If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired individually. | number_of_repair_threads | 1 | Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool repair. @@ -146,7 +146,7 @@ repair scheduler configuration: configuration for repair_type: full enabled: true min_repair_interval: 24h - repair_by_keyspace: false + repair_by_keyspace: true number_of_repair_threads: 1 sstable_upper_threshold: 10000 table_max_repair_time: 6h @@ -166,7 +166,7 @@ configuration for repair_type: full configuration for repair_type: incremental enabled: true min_repair_interval: 1h - repair_by_keyspace: false + repair_by_keyspace: true number_of_repair_threads: 1 sstable_upper_threshold: 10000 table_max_repair_time: 6h diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index e32374e8dc55..f401af28e3c8 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -426,7 +426,7 @@ protected static Options getDefaultOptions() Options opts = new Options(); opts.enabled = false; - opts.repair_by_keyspace = false; + opts.repair_by_keyspace = true; opts.number_of_repair_threads = 1; opts.parallel_repair_count = 3; opts.parallel_repair_percentage = 3; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index b95b8dd84b21..3bef45b8bd53 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -130,9 +130,12 @@ public void testSetAutoRepairEnabledNoMVOrCDC() @Test public void testSetRepairByKeyspace() { - config.setRepairByKeyspace(repairType, true); + // Should default to true. + assertTrue(config.getRepairByKeyspace(repairType)); - assertTrue(config.getOptions(repairType).repair_by_keyspace); + config.setRepairByKeyspace(repairType, false); + + assertFalse(config.getOptions(repairType).repair_by_keyspace); } @Test @@ -418,7 +421,7 @@ public void testDefaultOptions() Map defaultOptions = Options.getDefaultOptionsMap(); Options options = defaultOptions.get(repairType); assertFalse(options.enabled); - assertFalse(options.repair_by_keyspace); + assertTrue(options.repair_by_keyspace); assertEquals(Integer.valueOf(1), options.number_of_repair_threads); assertEquals(Integer.valueOf(3), options.parallel_repair_count); assertEquals(Integer.valueOf(3), options.parallel_repair_percentage); @@ -447,7 +450,7 @@ public void testGlobalOptions() { AutoRepairConfig config = new AutoRepairConfig(); assertFalse(config.global_settings.enabled); - assertFalse(config.global_settings.repair_by_keyspace); + assertTrue(config.global_settings.repair_by_keyspace); assertEquals(Integer.valueOf(1), config.global_settings.number_of_repair_threads); assertEquals(Integer.valueOf(3), config.global_settings.parallel_repair_count); assertEquals(Integer.valueOf(3), config.global_settings.parallel_repair_percentage); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index fd79fc4dd556..daeb6466fe2e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -65,7 +65,6 @@ public static void setUpClass() { CQLTester.setUpClass(); AutoRepairService.setup(); - AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(BigFormat.NAME)); } From b9381851b8001eb462887ba757f43f57f4b01953 Mon Sep 17 00:00:00 2001 From: Francisco Guerrero Date: Fri, 24 Jan 2025 16:46:40 -0800 Subject: [PATCH 128/257] AutoRepairServiceMBean should not use custom types for JMX methods Patch by Francisco Guerrero; reviewed by TBD for CASSANDRA-20185 --- .../cassandra/locator/InetAddressAndPort.java | 54 ++++++ .../repair/autorepair/AutoRepairConfig.java | 13 ++ .../FixedSplitTokenRangeSplitter.java | 2 +- .../cassandra/service/AutoRepairService.java | 160 +++++++++++++----- .../service/AutoRepairServiceMBean.java | 40 ++--- .../cassandra/service/StorageService.java | 17 +- .../service/StorageServiceMBean.java | 3 +- .../org/apache/cassandra/tools/NodeProbe.java | 53 +++--- .../tools/nodetool/AutoRepairStatus.java | 6 +- .../tools/nodetool/GetAutoRepairConfig.java | 83 +-------- .../tools/nodetool/SetAutoRepairConfig.java | 64 ++----- .../AutoRepairConfigRepairTypeTest.java | 61 +++++++ .../service/AutoRepairServiceBasicTest.java | 14 +- .../AutoRepairServiceRepairTypeTest.java | 2 +- .../service/AutoRepairServiceSetterTest.java | 14 +- .../tools/nodetool/AutoRepairStatusTest.java | 8 +- .../nodetool/SetAutoRepairConfigTest.java | 51 +++--- 17 files changed, 364 insertions(+), 281 deletions(-) create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java diff --git a/src/java/org/apache/cassandra/locator/InetAddressAndPort.java b/src/java/org/apache/cassandra/locator/InetAddressAndPort.java index 50f3368b2001..d0272dfc60bd 100644 --- a/src/java/org/apache/cassandra/locator/InetAddressAndPort.java +++ b/src/java/org/apache/cassandra/locator/InetAddressAndPort.java @@ -25,6 +25,9 @@ import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Pattern; import java.util.List; import java.util.stream.Collectors; @@ -32,8 +35,12 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; import com.google.common.net.HostAndPort; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -59,6 +66,7 @@ public final class InetAddressAndPort extends InetSocketAddress implements Comparable, Serializable { private static final long serialVersionUID = 0; + private static final Logger logger = LoggerFactory.getLogger(InetAddressAndPort.class); //Store these here to avoid requiring DatabaseDescriptor to be loaded. DatabaseDescriptor will set //these when it loads the config. A lot of unit tests won't end up loading DatabaseDescriptor. @@ -323,6 +331,52 @@ public static void initializeDefaultPort(int port) defaultPort = port; } + public static List stringify(Iterable endpoints) + { + return stringify(endpoints, true); + } + + public static List stringify(Iterable endpoints, boolean withPort) + { + List stringEndpoints = new ArrayList<>(); + for (InetAddressAndPort ep : endpoints) + { + stringEndpoints.add(ep.getHostAddress(withPort)); + } + return stringEndpoints; + } + + /** + * Parses a comma-separated list of hosts to a set of {@link InetAddressAndPort} + * + * @param value the comma-separated list of hosts to parse + * @param failOnError whether to fail when encountering an invalid hostname + * @return the set of parsed {@link InetAddressAndPort} + */ + public static Set parseHosts(String value, boolean failOnError) + { + Set hosts = new HashSet<>(); + for (String host : Splitter.on(',').split(value)) + { + try + { + hosts.add(InetAddressAndPort.getByName(host)); + } + catch (UnknownHostException e) + { + if (failOnError) + { + throw new IllegalArgumentException(e.getMessage()); + } + else + { + logger.warn("Invalid ip address {} from input={}", host, value); + } + } + } + return hosts; + } + static int getDefaultPort() { return defaultPort; diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index f401af28e3c8..80381af73139 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -23,6 +23,7 @@ import java.util.EnumMap; import java.util.HashSet; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentMap; import java.util.function.Function; @@ -104,6 +105,18 @@ public static AutoRepairState getAutoRepairState(RepairType repairType) throw new IllegalArgumentException("Invalid repair type: " + repairType); } + + /** + * Case-insensitive parsing of the repair type string into {@link RepairType} + * + * @param repairTypeStr the repair type string + * @return the {@link RepairType} represented by the {@code repairTypeStr} string + * @throws IllegalArgumentException when the repair type string does not match any repair type + */ + public static RepairType parse(String repairTypeStr) + { + return RepairType.valueOf(LocalizeString.toUpperCaseLocalized(Objects.requireNonNull(repairTypeStr, "repairTypeStr cannot be null"))); + } } // repair_type_overrides overrides the global_settings for a specific repair type. String used as key instead diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 07f186b2e748..c5a1030c7a51 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -142,4 +142,4 @@ public Map getParameters() { return Collections.singletonMap(NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubranges)); } -} \ No newline at end of file +} diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 9e3b56d18a27..af850f6c2dbf 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -18,6 +18,7 @@ package org.apache.cassandra.service; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; @@ -25,6 +26,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.utils.MBeanWrapper; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -32,6 +34,7 @@ import java.util.UUID; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; public class AutoRepairService implements AutoRepairServiceMBean { @@ -57,6 +60,11 @@ public static void setup() MBeanWrapper.instance.registerMBean(instance, MBEAN_NAME); } + public void checkCanRun(String repairType) + { + checkCanRun(RepairType.parse(repairType)); + } + public void checkCanRun(RepairType repairType) { if (!config.isAutoRepairSchedulingEnabled()) @@ -72,47 +80,71 @@ public void checkCanRun(RepairType repairType) throw new ConfigurationException("Cannot run incremental repair while CDC replay is enabled. Set cdc_on_repair_enabled to false."); } - @Override public AutoRepairConfig getAutoRepairConfig() { return config; } @Override - public void setAutoRepairEnabled(RepairType repairType, boolean enabled) + public boolean isAutoRepairDisabled() { - checkCanRun(repairType); - config.setAutoRepairEnabled(repairType, enabled); + return config == null || !config.isAutoRepairSchedulingEnabled(); + } + + @Override + public String autoRepairConfiguration() + { + StringBuilder sb = new StringBuilder(); + sb.append("repair scheduler configuration:"); + appendConfig(sb, "repair_check_interval", config.getRepairCheckInterval()); + appendConfig(sb, "repair_max_retries", config.getRepairMaxRetries()); + appendConfig(sb, "repair_retry_backoff", config.getRepairRetryBackoff()); + appendConfig(sb, "repair_task_min_duration", config.getRepairTaskMinDuration()); + appendConfig(sb, "history_clear_delete_hosts_buffer_interval", config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); + for (RepairType repairType : RepairType.values()) + { + sb.append(formatRepairTypeConfig(repairType, config)); + } + return sb.toString(); } @Override - public void setRepairThreads(RepairType repairType, int repairThreads) + public void setAutoRepairEnabled(String repairType, boolean enabled) { - config.setRepairThreads(repairType, repairThreads); + checkCanRun(repairType); + config.setAutoRepairEnabled(RepairType.parse(repairType), enabled); } @Override - public void setRepairPriorityForHosts(RepairType repairType, Set hosts) + public void setRepairThreads(String repairType, int repairThreads) { - AutoRepairUtils.addPriorityHosts(repairType, hosts); + config.setRepairThreads(RepairType.parse(repairType), repairThreads); } @Override - public Set getRepairHostPriority(RepairType repairType) + public void setRepairPriorityForHosts(String repairType, String commaSeparatedHostSet) { - return AutoRepairUtils.getPriorityHosts(repairType); + Set hosts = InetAddressAndPort.parseHosts(commaSeparatedHostSet, false); + if (!hosts.isEmpty()) + { + AutoRepairUtils.addPriorityHosts(RepairType.parse(repairType), hosts); + } } @Override - public void setForceRepairForHosts(RepairType repairType, Set hosts) + public void setForceRepairForHosts(String repairType, String commaSeparatedHostSet) { - AutoRepairUtils.setForceRepair(repairType, hosts); + Set hosts = InetAddressAndPort.parseHosts(commaSeparatedHostSet, false); + if (!hosts.isEmpty()) + { + AutoRepairUtils.setForceRepair(RepairType.parse(repairType), hosts); + } } @Override - public void setRepairMinInterval(RepairType repairType, String minRepairInterval) + public void setRepairMinInterval(String repairType, String minRepairInterval) { - config.setRepairMinInterval(repairType, minRepairInterval); + config.setRepairMinInterval(RepairType.parse(repairType), minRepairInterval); } @Override @@ -121,6 +153,7 @@ public void startScheduler() config.startScheduler(); } + @Override public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) { config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); @@ -145,61 +178,63 @@ public void setAutoRepairMinRepairTaskDuration(String duration) } @Override - public void setRepairSSTableCountHigherThreshold(RepairType repairType, int sstableHigherThreshold) + public void setRepairSSTableCountHigherThreshold(String repairType, int sstableHigherThreshold) { - config.setRepairSSTableCountHigherThreshold(repairType, sstableHigherThreshold); + config.setRepairSSTableCountHigherThreshold(RepairType.parse(repairType), sstableHigherThreshold); } @Override - public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) + public void setAutoRepairTableMaxRepairTime(String repairType, String autoRepairTableMaxRepairTime) { - config.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); + config.setAutoRepairTableMaxRepairTime(RepairType.parse(repairType), autoRepairTableMaxRepairTime); } @Override - public void setIgnoreDCs(RepairType repairType, Set ignoreDCs) + public void setIgnoreDCs(String repairType, Set ignoreDCs) { - config.setIgnoreDCs(repairType, ignoreDCs); + config.setIgnoreDCs(RepairType.parse(repairType), ignoreDCs); } @Override - public void setPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly) + public void setPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) { - config.setRepairPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); + config.setRepairPrimaryTokenRangeOnly(RepairType.parse(repairType), primaryTokenRangeOnly); } @Override - public void setParallelRepairPercentage(RepairType repairType, int percentage) + public void setParallelRepairPercentage(String repairType, int percentage) { - config.setParallelRepairPercentage(repairType, percentage); + config.setParallelRepairPercentage(RepairType.parse(repairType), percentage); } @Override - public void setParallelRepairCount(RepairType repairType, int count) + public void setParallelRepairCount(String repairType, int count) { - config.setParallelRepairCount(repairType, count); + config.setParallelRepairCount(RepairType.parse(repairType), count); } - public void setMVRepairEnabled(RepairType repairType, boolean enabled) + @Override + public void setMVRepairEnabled(String repairType, boolean enabled) { - config.setMaterializedViewRepairEnabled(repairType, enabled); + config.setMaterializedViewRepairEnabled(RepairType.parse(repairType), enabled); } - public void setRepairSessionTimeout(RepairType repairType, String timeout) + @Override + public void setRepairSessionTimeout(String repairType, String timeout) { - config.setRepairSessionTimeout(repairType, timeout); + config.setRepairSessionTimeout(RepairType.parse(repairType), timeout); } @Override - public Set getOnGoingRepairHostIds(RepairType rType) + public Set getOnGoingRepairHostIds(String repairType) { - Set hostIds = new HashSet<>(); - List histories = AutoRepairUtils.getAutoRepairHistory(rType); + List histories = AutoRepairUtils.getAutoRepairHistory(RepairType.parse(repairType)); if (histories == null) { - return hostIds; + return Collections.emptySet(); } - AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(rType)); + Set hostIds = new HashSet<>(); + AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(RepairType.parse(repairType))); for (UUID id : currentRepairStatus.hostIdsWithOnGoingRepair) { hostIds.add(id.toString()); @@ -208,18 +243,61 @@ public Set getOnGoingRepairHostIds(RepairType rType) { hostIds.add(id.toString()); } - return hostIds; + return Collections.unmodifiableSet(hostIds); } @Override - public Map getAutoRepairTokenRangeSplitterParameters(RepairType repairType) + public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value) { - return config.getTokenRangeSplitterInstance(repairType).getParameters(); + config.getTokenRangeSplitterInstance(RepairType.parse(repairType)).setParameter(key, value); } - @Override - public void setAutoRepairTokenRangeSplitterParameter(RepairType repairType, String key, String value) + private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig config) + { + StringBuilder sb = new StringBuilder(); + sb.append("\nconfiguration for repair_type: ").append(repairType.getConfigName()); + sb.append("\n\tenabled: ").append(config.isAutoRepairEnabled(repairType)); + // Only show configuration if enabled + if (config.isAutoRepairEnabled(repairType)) + { + Set priorityHosts = AutoRepairUtils.getPriorityHosts(repairType); + if (!priorityHosts.isEmpty()) + { + appendConfig(sb, "priority_hosts", Joiner.on(',').skipNulls().join(priorityHosts)); + } + + appendConfig(sb , "min_repair_interval", config.getRepairMinInterval(repairType)); + appendConfig(sb , "repair_by_keyspace", config.getRepairByKeyspace(repairType)); + appendConfig(sb , "number_of_repair_threads", config.getRepairThreads(repairType)); + appendConfig(sb , "sstable_upper_threshold", config.getRepairSSTableCountHigherThreshold(repairType)); + appendConfig(sb , "table_max_repair_time", config.getAutoRepairTableMaxRepairTime(repairType)); + appendConfig(sb , "ignore_dcs", config.getIgnoreDCs(repairType)); + appendConfig(sb , "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); + appendConfig(sb , "parallel_repair_count", config.getParallelRepairCount(repairType)); + appendConfig(sb , "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); + appendConfig(sb , "materialized_view_repair_enabled", config.getMaterializedViewRepairEnabled(repairType)); + appendConfig(sb , "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); + appendConfig(sb , "repair_session_timeout", config.getRepairSessionTimeout(repairType)); + appendConfig(sb , "force_repair_new_node", config.getForceRepairNewNode(repairType)); + + final ParameterizedClass splitterClass = config.getTokenRangeSplitter(repairType); + final String splitterClassName = splitterClass.class_name != null ? splitterClass.class_name : AutoRepairConfig.DEFAULT_SPLITTER.getName(); + appendConfig(sb, "token_range_splitter", splitterClassName); + Map tokenRangeSplitterParameters = config.getTokenRangeSplitterInstance(repairType).getParameters(); + if (!tokenRangeSplitterParameters.isEmpty()) + { + for (Map.Entry param : tokenRangeSplitterParameters.entrySet()) + { + appendConfig(sb, String.format("token_range_splitter.%s", param.getKey()), param.getValue()); + } + } + } + + return sb.toString(); + } + + private void appendConfig(StringBuilder sb, String config, T value) { - config.getTokenRangeSplitterInstance(repairType).setParameter(key, value); + sb.append(String.format("%s%s: %s", "\n\t", config, value)); } } diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 14fe9feb7958..bad7667c6acc 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -17,11 +17,7 @@ */ package org.apache.cassandra.service; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; -import java.util.Map; import java.util.Set; public interface AutoRepairServiceMBean @@ -29,17 +25,15 @@ public interface AutoRepairServiceMBean /** * Enable or disable auto-repair for a given repair type */ - public void setAutoRepairEnabled(RepairType repairType, boolean enabled); + public void setAutoRepairEnabled(String repairType, boolean enabled); - public void setRepairThreads(RepairType repairType, int repairThreads); + public void setRepairThreads(String repairType, int repairThreads); - public void setRepairPriorityForHosts(RepairType repairType, Set host); + public void setRepairPriorityForHosts(String repairType, String commaSeparatedHostSet); - public void setForceRepairForHosts(RepairType repairType, Set host); + public void setForceRepairForHosts(String repairType, String commaSeparatedHostSet); - public Set getRepairHostPriority(RepairType repairType); - - public void setRepairMinInterval(RepairType repairType, String minRepairInterval); + public void setRepairMinInterval(String repairType, String minRepairInterval); void startScheduler(); @@ -51,27 +45,27 @@ public interface AutoRepairServiceMBean public void setAutoRepairMinRepairTaskDuration(String duration); - public void setRepairSSTableCountHigherThreshold(RepairType repairType, int ssTableHigherThreshold); + public void setRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold); - public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime); + public void setAutoRepairTableMaxRepairTime(String repairType, String autoRepairTableMaxRepairTime); - public void setIgnoreDCs(RepairType repairType, Set ignorDCs); + public void setIgnoreDCs(String repairType, Set ignorDCs); - public void setPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly); + public void setPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly); - public void setParallelRepairPercentage(RepairType repairType, int percentage); + public void setParallelRepairPercentage(String repairType, int percentage); - public void setParallelRepairCount(RepairType repairType, int count); + public void setParallelRepairCount(String repairType, int count); - public void setMVRepairEnabled(RepairType repairType, boolean enabled); + public void setMVRepairEnabled(String repairType, boolean enabled); - public AutoRepairConfig getAutoRepairConfig(); + public boolean isAutoRepairDisabled(); - public void setRepairSessionTimeout(RepairType repairType, String timeout); + public String autoRepairConfiguration(); - public Set getOnGoingRepairHostIds(RepairType rType); + public void setRepairSessionTimeout(String repairType, String timeout); - public Map getAutoRepairTokenRangeSplitterParameters(RepairType repairType); + public Set getOnGoingRepairHostIds(String repairType); - public void setAutoRepairTokenRangeSplitterParameter(RepairType repairType, String key, String value); + public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value); } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index f1302752b653..741c2bed7fb3 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -253,6 +253,7 @@ import static org.apache.cassandra.index.SecondaryIndexManager.getIndexName; import static org.apache.cassandra.index.SecondaryIndexManager.isIndexColumnFamily; import static org.apache.cassandra.io.util.FileUtils.ONE_MIB; +import static org.apache.cassandra.locator.InetAddressAndPort.stringify; import static org.apache.cassandra.schema.SchemaConstants.isLocalSystemKeyspace; import static org.apache.cassandra.service.ActiveRepairService.ParentRepairStatus; import static org.apache.cassandra.service.ActiveRepairService.repairCommandExecutor; @@ -2627,16 +2628,6 @@ public String getSavedCachesLocation() return FileUtils.getCanonicalPath(DatabaseDescriptor.getSavedCachesLocation()); } - private List stringify(Iterable endpoints, boolean withPort) - { - List stringEndpoints = new ArrayList<>(); - for (InetAddressAndPort ep : endpoints) - { - stringEndpoints.add(ep.getHostAddress(withPort)); - } - return stringEndpoints; - } - public int getCurrentGenerationNumber() { return Gossiper.instance.getCurrentGenerationNumber(getBroadcastAddressAndPort()); @@ -5671,19 +5662,21 @@ public void alterTopology(String changes) }); } + @Override public List getTablesForKeyspace(String keyspace) { return Keyspace.open(keyspace).getColumnFamilyStores().stream().map(cfs -> cfs.name).collect(Collectors.toList()); } - public List mutateSSTableRepairedState(boolean repaired, boolean preview, String keyspace, List tableNames) throws InvalidRequestException + @Override + public List mutateSSTableRepairedState(boolean repaired, boolean preview, String keyspace, List tableNames) { Map tables = Keyspace.open(keyspace).getColumnFamilyStores() .stream().collect(Collectors.toMap(c -> c.name, c -> c)); for (String tableName : tableNames) { if (!tables.containsKey(tableName)) - throw new InvalidRequestException("Table " + tableName + " does not exist in keyspace " + keyspace); + throw new RuntimeException("Table " + tableName + " does not exist in keyspace " + keyspace); } // only select SSTables that are unrepaired when repaired is true and vice versa diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index c361574deb04..e188595aa361 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -37,7 +37,6 @@ import org.apache.cassandra.db.ColumnFamilyStoreMBean; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.BreaksJMX; -import org.apache.cassandra.exceptions.InvalidRequestException; public interface StorageServiceMBean extends NotificationEmitter { @@ -1383,5 +1382,5 @@ public void enableAuditLog(String loggerName, String includedKeyspaces, String e public List getTablesForKeyspace(String keyspace); /** Mutates the repaired state of all SSTables for the given SSTables */ - public List mutateSSTableRepairedState(boolean repaired, boolean preview, String keyspace, List tables) throws InvalidRequestException; + public List mutateSSTableRepairedState(boolean repaired, boolean preview, String keyspace, List tables); } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index e953b7a3d4ce..88344cb96b66 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -90,7 +90,6 @@ import org.apache.cassandra.db.compaction.CompactionManagerMBean; import org.apache.cassandra.db.virtual.CIDRFilteringMetricsTable; import org.apache.cassandra.db.virtual.CIDRFilteringMetricsTableMBean; -import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.fql.FullQueryLoggerOptions; import org.apache.cassandra.fql.FullQueryLoggerOptionsCompositeData; import org.apache.cassandra.gms.FailureDetector; @@ -106,14 +105,12 @@ import org.apache.cassandra.locator.EndpointSnitchInfoMBean; import org.apache.cassandra.locator.LocationInfoMBean; import org.apache.cassandra.metrics.CIDRAuthorizerMetrics; -import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.CassandraMetricsRegistry; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.metrics.TableMetrics; import org.apache.cassandra.metrics.ThreadPoolMetrics; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.MessagingServiceMBean; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.ActiveRepairServiceMBean; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.AutoRepairServiceMBean; @@ -2564,46 +2561,42 @@ public void abortBootstrap(String nodeId, String endpoint) ssProxy.abortBootstrap(nodeId, endpoint); } - public AutoRepairConfig getAutoRepairConfig() + public boolean isAutoRepairDisabled() { - return autoRepairProxy.getAutoRepairConfig(); + return autoRepairProxy.isAutoRepairDisabled(); } - public Map getAutoRepairTokenRangeSplitterParameters(AutoRepairConfig.RepairType repairType) + public String autoRepairConfiguration() { - return autoRepairProxy.getAutoRepairTokenRangeSplitterParameters(repairType); + return autoRepairProxy.autoRepairConfiguration(); } - public void setAutoRepairTokenRangeSplitterParameter(AutoRepairConfig.RepairType repairType, String key, String value) + public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value) { autoRepairProxy.setAutoRepairTokenRangeSplitterParameter(repairType, key, value); } - public void setAutoRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) + public void setAutoRepairEnabled(String repairType, boolean enabled) { autoRepairProxy.setAutoRepairEnabled(repairType, enabled); } - public void setRepairThreads(AutoRepairConfig.RepairType repairType, int repairThreads) + public void setRepairThreads(String repairType, int repairThreads) { autoRepairProxy.setRepairThreads(repairType, repairThreads); } - public void setRepairPriorityForHosts(AutoRepairConfig.RepairType repairType, Set hosts) + public void setRepairPriorityForHosts(String repairType, String commaSeparatedHostSet) { - autoRepairProxy.setRepairPriorityForHosts(repairType, hosts); + autoRepairProxy.setRepairPriorityForHosts(repairType, commaSeparatedHostSet); } - public Set getRepairPriorityForHosts(AutoRepairConfig.RepairType repairType) + public void setForceRepairForHosts(String repairType, String commaSeparatedHostSet) { - return autoRepairProxy.getRepairHostPriority(repairType); + autoRepairProxy.setForceRepairForHosts(repairType, commaSeparatedHostSet); } - public void setForceRepairForHosts(AutoRepairConfig.RepairType repairType, Set hosts){ - autoRepairProxy.setForceRepairForHosts(repairType, hosts); - } - - public void setRepairMinInterval(AutoRepairConfig.RepairType repairType, String minRepairInterval) + public void setRepairMinInterval(String repairType, String minRepairInterval) { autoRepairProxy.setRepairMinInterval(repairType, minRepairInterval); } @@ -2633,42 +2626,42 @@ public void setAutoRepairMinRepairTaskDuration(String duration) autoRepairProxy.setAutoRepairMinRepairTaskDuration(duration); } - public void setRepairSSTableCountHigherThreshold(AutoRepairConfig.RepairType repairType, int ssTableHigherThreshold) + public void setRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold) { autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); } - public void setAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType repairType, String autoRepairTableMaxRepairTime) + public void setAutoRepairTableMaxRepairTime(String repairType, String autoRepairTableMaxRepairTime) { autoRepairProxy.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); } - public void setAutoRepairIgnoreDCs(AutoRepairConfig.RepairType repairType, Set ignoreDCs) + public void setAutoRepairIgnoreDCs(String repairType, Set ignoreDCs) { autoRepairProxy.setIgnoreDCs(repairType, ignoreDCs); } - public void setParallelRepairPercentage(AutoRepairConfig.RepairType repairType, int percentage) + public void setParallelRepairPercentage(String repairType, int percentage) { autoRepairProxy.setParallelRepairPercentage(repairType, percentage); } - public void setParallelRepairCount(AutoRepairConfig.RepairType repairType, int count) + public void setParallelRepairCount(String repairType, int count) { autoRepairProxy.setParallelRepairCount(repairType, count); } - public void setPrimaryTokenRangeOnly(AutoRepairConfig.RepairType repairType, boolean primaryTokenRangeOnly) + public void setPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) { autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); } - public void setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType repairType, boolean enabled) + public void setMaterializedViewRepairEnabled(String repairType, boolean enabled) { autoRepairProxy.setMVRepairEnabled(repairType, enabled); } - public List mutateSSTableRepairedState(boolean repair, boolean preview, String keyspace, List tables) throws InvalidRequestException + public List mutateSSTableRepairedState(boolean repair, boolean preview, String keyspace, List tables) { return ssProxy.mutateSSTableRepairedState(repair, preview, keyspace, tables); } @@ -2678,14 +2671,14 @@ public List getTablesForKeyspace(String keyspace) return ssProxy.getTablesForKeyspace(keyspace); } - public void setRepairSessionTimeout(AutoRepairConfig.RepairType repairType, String timeout) + public void setRepairSessionTimeout(String repairType, String timeout) { autoRepairProxy.setRepairSessionTimeout(repairType, timeout); } - public Set getOnGoingRepairHostIds(AutoRepairConfig.RepairType type) + public Set getOnGoingRepairHostIds(String repairType) { - return autoRepairProxy.getOnGoingRepairHostIds(type); + return autoRepairProxy.getOnGoingRepairHostIds(repairType); } } diff --git a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java index 7b96102c6698..22f9afc43fe1 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java +++ b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java @@ -25,7 +25,6 @@ import io.airlift.airline.Command; import io.airlift.airline.Option; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool; import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; @@ -37,7 +36,7 @@ public class AutoRepairStatus extends NodeTool.NodeToolCmd { @VisibleForTesting @Option(title = "repair type", name = { "-t", "--repair-type" }, description = "Repair type") - protected AutoRepairConfig.RepairType repairType; + protected String repairType; @Override public void execute(NodeProbe probe) @@ -45,8 +44,7 @@ public void execute(NodeProbe probe) checkArgument(repairType != null, "--repair-type is required."); PrintStream out = probe.output().out; - AutoRepairConfig config = probe.getAutoRepairConfig(); - if (config == null || !config.isAutoRepairSchedulingEnabled()) + if (probe.isAutoRepairDisabled()) { out.println("Auto-repair is not enabled"); return; diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index de234893cb49..961fb71fb4e4 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -17,20 +17,14 @@ */ package org.apache.cassandra.tools.nodetool; +import java.io.PrintStream; + import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; + import io.airlift.airline.Command; -import org.apache.cassandra.config.ParameterizedClass; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; -import java.io.PrintStream; -import java.util.Map; -import java.util.Set; - @Command(name = "getautorepairconfig", description = "Print autorepair configurations") public class GetAutoRepairConfig extends NodeToolCmd { @@ -40,74 +34,9 @@ public class GetAutoRepairConfig extends NodeToolCmd @Override public void execute(NodeProbe probe) { - AutoRepairConfig config = probe.getAutoRepairConfig(); - if (config == null || !config.isAutoRepairSchedulingEnabled()) - { + if (probe.isAutoRepairDisabled()) out.println("Auto-repair is not enabled"); - return; - } - - StringBuilder sb = new StringBuilder(); - sb.append("repair scheduler configuration:"); - appendConfig(sb, "repair_check_interval", config.getRepairCheckInterval()); - appendConfig(sb, "repair_max_retries", config.getRepairMaxRetries()); - appendConfig(sb, "repair_retry_backoff", config.getRepairRetryBackoff()); - appendConfig(sb, "repair_task_min_duration", config.getRepairTaskMinDuration()); - appendConfig(sb, "history_clear_delete_hosts_buffer_interval", config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); - for (RepairType repairType : RepairType.values()) - { - sb.append(formatRepairTypeConfig(probe, repairType, config)); - } - - out.println(sb); - } - - private String formatRepairTypeConfig(NodeProbe probe, RepairType repairType, AutoRepairConfig config) - { - StringBuilder sb = new StringBuilder(); - sb.append("\nconfiguration for repair_type: ").append(repairType.getConfigName()); - sb.append("\n\tenabled: ").append(config.isAutoRepairEnabled(repairType)); - // Only show configuration if enabled - if (config.isAutoRepairEnabled(repairType)) - { - Set priorityHosts = probe.getRepairPriorityForHosts(repairType); - if (!priorityHosts.isEmpty()) - { - appendConfig(sb, "priority_hosts", Joiner.on(',').skipNulls().join(priorityHosts)); - } - - appendConfig(sb , "min_repair_interval", config.getRepairMinInterval(repairType)); - appendConfig(sb , "repair_by_keyspace", config.getRepairByKeyspace(repairType)); - appendConfig(sb , "number_of_repair_threads", config.getRepairThreads(repairType)); - appendConfig(sb , "sstable_upper_threshold", config.getRepairSSTableCountHigherThreshold(repairType)); - appendConfig(sb , "table_max_repair_time", config.getAutoRepairTableMaxRepairTime(repairType)); - appendConfig(sb , "ignore_dcs", config.getIgnoreDCs(repairType)); - appendConfig(sb , "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); - appendConfig(sb , "parallel_repair_count", config.getParallelRepairCount(repairType)); - appendConfig(sb , "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); - appendConfig(sb , "materialized_view_repair_enabled", config.getMaterializedViewRepairEnabled(repairType)); - appendConfig(sb , "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); - appendConfig(sb , "repair_session_timeout", config.getRepairSessionTimeout(repairType)); - appendConfig(sb , "force_repair_new_node", config.getForceRepairNewNode(repairType)); - - final ParameterizedClass splitterClass = config.getTokenRangeSplitter(repairType); - final String splitterClassName = splitterClass.class_name != null ? splitterClass.class_name : AutoRepairConfig.DEFAULT_SPLITTER.getName(); - appendConfig(sb, "token_range_splitter", splitterClassName); - Map tokenRangeSplitterParameters = probe.getAutoRepairTokenRangeSplitterParameters(repairType); - if (!tokenRangeSplitterParameters.isEmpty()) - { - for (Map.Entry param : tokenRangeSplitterParameters.entrySet()) - { - appendConfig(sb, String.format("token_range_splitter.%s", param.getKey()), param.getValue()); - } - } - } - - return sb.toString(); - } - - private void appendConfig(StringBuilder sb, String config, T value) - { - sb.append(String.format("%s%s: %s", "\n\t", config, value)); + else + out.println(probe.autoRepairConfiguration()); } } diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 52704dfcd8f2..d55685e72426 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -23,17 +23,14 @@ import io.airlift.airline.Arguments; import io.airlift.airline.Command; import io.airlift.airline.Option; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; -import org.apache.cassandra.utils.LocalizeString; import java.io.PrintStream; -import java.net.UnknownHostException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Set; import static com.google.common.base.Preconditions.checkArgument; @@ -68,7 +65,7 @@ public void execute(NodeProbe probe) String paramType = args.get(0); String paramVal = args.get(1); - if (!probe.getAutoRepairConfig().isAutoRepairSchedulingEnabled() && !paramType.equalsIgnoreCase("start_scheduler")) + if (probe.isAutoRepairDisabled() && !paramType.equalsIgnoreCase("start_scheduler")) { out.println("Auto-repair is not enabled"); return; @@ -101,47 +98,40 @@ public void execute(NodeProbe probe) } // options below require --repair-type option - checkArgument(repairTypeStr != null, "--repair-type is required for this parameter."); - final RepairType repairType = RepairType.valueOf(LocalizeString.toUpperCaseLocalized(repairTypeStr)); + Objects.requireNonNull(repairTypeStr, "--repair-type is required for this parameter."); if(paramType.startsWith(TOKEN_RANGE_SPLITTER_PROPERTY_PREFIX)) { final String key = paramType.replace(TOKEN_RANGE_SPLITTER_PROPERTY_PREFIX, ""); - probe.setAutoRepairTokenRangeSplitterParameter(repairType, key, paramVal); + probe.setAutoRepairTokenRangeSplitterParameter(repairTypeStr, key, paramVal); return; } - Set hosts; switch (paramType) { case "enabled": - probe.setAutoRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); + probe.setAutoRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "number_of_repair_threads": - probe.setRepairThreads(repairType, Integer.parseInt(paramVal)); + probe.setRepairThreads(repairTypeStr, Integer.parseInt(paramVal)); break; case "min_repair_interval": - probe.setRepairMinInterval(repairType, paramVal); + probe.setRepairMinInterval(repairTypeStr, paramVal); break; case "sstable_upper_threshold": - probe.setRepairSSTableCountHigherThreshold(repairType, Integer.parseInt(paramVal)); + probe.setRepairSSTableCountHigherThreshold(repairTypeStr, Integer.parseInt(paramVal)); break; case "table_max_repair_time": - probe.setAutoRepairTableMaxRepairTime(repairType, paramVal); + probe.setAutoRepairTableMaxRepairTime(repairTypeStr, paramVal); break; case "priority_hosts": - hosts = retrieveHosts(paramVal); - if (!hosts.isEmpty()) + if (paramVal!= null && !paramVal.isEmpty()) { - probe.setRepairPriorityForHosts(repairType, hosts); + probe.setRepairPriorityForHosts(repairTypeStr, paramVal); } break; case "forcerepair_hosts": - hosts = retrieveHosts(paramVal); - if (!hosts.isEmpty()) - { - probe.setForceRepairForHosts(repairType, hosts); - } + probe.setForceRepairForHosts(repairTypeStr, paramVal); break; case "ignore_dcs": Set ignoreDCs = new HashSet<>(); @@ -149,43 +139,25 @@ public void execute(NodeProbe probe) { ignoreDCs.add(dc); } - probe.setAutoRepairIgnoreDCs(repairType, ignoreDCs); + probe.setAutoRepairIgnoreDCs(repairTypeStr, ignoreDCs); break; case "repair_primary_token_range_only": - probe.setPrimaryTokenRangeOnly(repairType, Boolean.parseBoolean(paramVal)); + probe.setPrimaryTokenRangeOnly(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "parallel_repair_count": - probe.setParallelRepairCount(repairType, Integer.parseInt(paramVal)); + probe.setParallelRepairCount(repairTypeStr, Integer.parseInt(paramVal)); break; case "parallel_repair_percentage": - probe.setParallelRepairPercentage(repairType, Integer.parseInt(paramVal)); + probe.setParallelRepairPercentage(repairTypeStr, Integer.parseInt(paramVal)); break; case "materialized_view_repair_enabled": - probe.setMaterializedViewRepairEnabled(repairType, Boolean.parseBoolean(paramVal)); + probe.setMaterializedViewRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "repair_session_timeout": - probe.setRepairSessionTimeout(repairType, paramVal); + probe.setRepairSessionTimeout(repairTypeStr, paramVal); break; default: throw new IllegalArgumentException("Unknown parameter: " + paramType); } } - - private Set retrieveHosts(String paramVal) - { - Set hosts = new HashSet<>(); - for (String host : Splitter.on(',').split(paramVal)) - { - try - { - hosts.add(InetAddressAndPort.getByName(host)); - } - catch (UnknownHostException e) - { - out.println("invalid ip address: " + host); - } - } - - return hosts; - } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java new file mode 100644 index 000000000000..be14b8b912ea --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + + +import org.junit.Assert; +import org.junit.Test; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType} + */ +public class AutoRepairConfigRepairTypeTest +{ + @Test + public void testRepairTypeParsing() + { + Assert.assertEquals(AutoRepairConfig.RepairType.FULL, AutoRepairConfig.RepairType.parse("FULL")); + Assert.assertEquals(AutoRepairConfig.RepairType.FULL, AutoRepairConfig.RepairType.parse("FuLl")); + Assert.assertEquals(AutoRepairConfig.RepairType.FULL, AutoRepairConfig.RepairType.parse("full")); + Assert.assertEquals(AutoRepairConfig.RepairType.INCREMENTAL, AutoRepairConfig.RepairType.parse("INCREMENTAL")); + Assert.assertEquals(AutoRepairConfig.RepairType.INCREMENTAL, AutoRepairConfig.RepairType.parse("incremental")); + Assert.assertEquals(AutoRepairConfig.RepairType.INCREMENTAL, AutoRepairConfig.RepairType.parse("inCRemenTal")); + Assert.assertEquals(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, AutoRepairConfig.RepairType.parse("PREVIEW_REPAIRED")); + Assert.assertEquals(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, AutoRepairConfig.RepairType.parse("preview_repaired")); + Assert.assertEquals(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, AutoRepairConfig.RepairType.parse("Preview_Repaired")); + } + + @Test(expected = NullPointerException.class) + public void testNullRepairTypeParsing() + { + AutoRepairConfig.RepairType.parse(null); + } + + @Test(expected = IllegalArgumentException.class) + public void testEmptyRepairTypeParsing() + { + AutoRepairConfig.RepairType.parse(""); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidRepairTypeParsing() + { + AutoRepairConfig.RepairType.parse("very_FULL"); + } +} diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index 35ceb90a3f1a..358a9af78f86 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -84,7 +84,7 @@ public void testsetAutoRepairRetryBackoffInSec() { public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { autoRepairService.config = new AutoRepairConfig(false); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } @Test @@ -92,7 +92,7 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisa autoRepairService.config = new AutoRepairConfig(true); autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } @Test(expected = ConfigurationException.class) @@ -100,7 +100,7 @@ public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { autoRepairService.config = new AutoRepairConfig(true); autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } @Test @@ -108,7 +108,7 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setMaterializedViewsEnabled(true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } @Test @@ -116,7 +116,7 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(false); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } @Test(expected = ConfigurationException.class) @@ -124,13 +124,13 @@ public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } @Test public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCEnabled(true); - autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java index 7c8645149adc..fccac4762cb3 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -72,7 +72,7 @@ public void testGetOnGoingRepairHostIdsTest() { AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); - Set hosts = instance.getOnGoingRepairHostIds(repairType); + Set hosts = instance.getOnGoingRepairHostIds(repairType.name()); assertEquals(ImmutableSet.of(host1.toString(), host2.toString()), hosts); } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index 2ac939f4ab16..d57e625fa9bf 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -51,13 +51,13 @@ public class AutoRepairServiceSetterTest extends CQLTester { private static final AutoRepairConfig config = new AutoRepairConfig(true); @Parameterized.Parameter - public AutoRepairConfig.RepairType repairType; + public AutoRepairConfig.RepairType repairTypeStr; @Parameterized.Parameter(1) public T arg; @Parameterized.Parameter(2) - public BiConsumer setter; + public BiConsumer setter; @Parameterized.Parameter(3) public Function getter; @@ -74,8 +74,8 @@ public static Collection testCases() { forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMaterializedViewRepairEnabled), - forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setRepairPriorityForHosts, AutoRepairUtils::getPriorityHosts), - forEachRepairType(ImmutableSet.of(InetAddressAndPort.getLocalHost()), AutoRepairService.instance::setForceRepairForHosts, AutoRepairServiceSetterTest::isLocalHostForceRepair) + forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setRepairPriorityForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairUtils::getPriorityHosts), + forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setForceRepairForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairServiceSetterTest::isLocalHostForceRepair) ).flatMap(Function.identity()).collect(Collectors.toList()); } @@ -91,7 +91,7 @@ private static Set isLocalHostForceRepair(AutoRepairConfig.R return ImmutableSet.of(); } - private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { + private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { testCases[repairType.ordinal()] = new Object[]{repairType, arg, setter, getter}; @@ -125,7 +125,7 @@ public void prepare() { public void testSettersTest() { DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); DatabaseDescriptor.setCDCOnRepairEnabled(false); - setter.accept(repairType, arg); - assertEquals(arg, getter.apply(repairType)); + setter.accept(repairTypeStr.name(), arg); + assertEquals(arg, getter.apply(repairTypeStr)); } } diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java index f1faabc0fc03..4e5f86e7233d 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -72,7 +72,7 @@ public void setUp() throws Exception setAutoRepairEnabled(true); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.FULL, true); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); - when(probe.getAutoRepairConfig()).thenReturn(DatabaseDescriptor.getAutoRepairConfig()); +// when(probe.getAutoRepairConfig()).thenReturn(DatabaseDescriptor.getAutoRepairConfig()); } @Test(expected = IllegalArgumentException.class) @@ -85,7 +85,7 @@ public void testExecuteWithoutRepairType() @Test public void testExecuteWithNoNodes() { - cmd.repairType = repairType; + cmd.repairType = repairType.name(); cmd.execute(probe); assertEquals("Active Repairs\n" + @@ -95,8 +95,8 @@ public void testExecuteWithNoNodes() @Test public void testExecute() { - when(probe.getOnGoingRepairHostIds(repairType)).thenReturn(ImmutableSet.of("host1", "host2", "host3", "host4")); - cmd.repairType = repairType; + when(probe.getOnGoingRepairHostIds(repairType.name())).thenReturn(ImmutableSet.of("host1", "host2", "host3", "host4")); + cmd.repairType = repairType.name(); cmd.execute(probe); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 1207186fb6b8..308e08589467 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -50,14 +50,11 @@ SetAutoRepairConfigTest.RepairTypeAndArgsParamsTests.class }) public class SetAutoRepairConfigTest { - protected static AutoRepairConfig config; - protected static SetAutoRepairConfig cmd; public static void before(NodeProbe probeMock, PrintStream outMock) { - config = new AutoRepairConfig(true); - when(probeMock.getAutoRepairConfig()).thenReturn(config); + when(probeMock.isAutoRepairDisabled()).thenReturn(false); cmd = new SetAutoRepairConfig(); cmd.out = outMock; } @@ -87,7 +84,7 @@ public void testHistoryDeleteHostsClearBufferInSec() verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); // test scenario when auto repair is disabled - when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); + when(probe.isAutoRepairDisabled()).thenReturn(true); cmd.execute(probe); @@ -182,26 +179,26 @@ public void testNoArgs() @Test public void testRepairSchedulingDisabled() { - when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); + when(probe.isAutoRepairDisabled()).thenReturn(true); cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("threads", "1"); cmd.execute(probe); verify(out, times(1)).println("Auto-repair is not enabled"); - verify(probe, times(0)).setRepairThreads(repairType, 1); + verify(probe, times(0)).setRepairThreads(repairType.name(), 1); } @Test public void testRepairTypeDisabled() { - config.setAutoRepairEnabled(repairType, false); +// config.setAutoRepairEnabled(repairType, false); cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("number_of_repair_threads", "1"); cmd.execute(probe); - verify(probe, times(1)).setRepairThreads(repairType, 1); + verify(probe, times(1)).setRepairThreads(repairType.name(), 1); } @@ -222,7 +219,7 @@ public void testV2FlagMissing() // expected } - verify(probe, times(0)).setRepairThreads(repairType, 0); + verify(probe, times(0)).setRepairThreads(repairType.name(), 0); } @Test(expected = IllegalArgumentException.class) @@ -237,23 +234,25 @@ public void testInvalidParamType() @Test public void testPriorityHosts() { + String commaSeparatedHostSet = String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1)); cmd.repairTypeStr = repairType.name(); - cmd.args = ImmutableList.of("priority_hosts", String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1))); + cmd.args = ImmutableList.of("priority_hosts", commaSeparatedHostSet); cmd.execute(probe); - verify(probe, times(1)).setRepairPriorityForHosts(repairType, ImmutableSet.of(localEndpoint, otherEndpoint)); + verify(probe, times(1)).setRepairPriorityForHosts(repairType.name(), commaSeparatedHostSet); } @Test public void testForceRepairHosts() { + String commaSeparatedHostSet = String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1)); cmd.repairTypeStr = repairType.name(); - cmd.args = ImmutableList.of("forcerepair_hosts", String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1))); + cmd.args = ImmutableList.of("forcerepair_hosts", commaSeparatedHostSet); cmd.execute(probe); - verify(probe, times(1)).setForceRepairForHosts(repairType, ImmutableSet.of(localEndpoint, otherEndpoint)); + verify(probe, times(1)).setForceRepairForHosts(repairType.name(), commaSeparatedHostSet); } } @@ -276,17 +275,17 @@ public static class RepairTypeAndArgsParamsTests public static Collection testCases() { return Stream.of( - forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type, true)), - forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setRepairThreads(type, 1)), - forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setRepairMinInterval(type, "3h")), - forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type, 4)), - forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type, "5s")), - forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type, true)), - forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type, 6)), - forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type, 7)), - forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setMaterializedViewRepairEnabled(type, true)), - forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type, ImmutableSet.of("dc1", "dc2"))), - forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type, "max_bytes_per_schedule", "500GiB")) + forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type.name(), true)), + forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setRepairThreads(type.name(), 1)), + forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setRepairMinInterval(type.name(), "3h")), + forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type.name(), 4)), + forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type.name(), "5s")), + forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type.name(), true)), + forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type.name(), 6)), + forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type.name(), 7)), + forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setMaterializedViewRepairEnabled(type.name(), true)), + forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type.name(), ImmutableSet.of("dc1", "dc2"))), + forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type.name(), "max_bytes_per_schedule", "500GiB")) ).flatMap(Function.identity()).collect(Collectors.toList()); } @@ -325,7 +324,7 @@ public void test() verifyFunc.accept(repairType); // test scenario when auto repair is disabled - when(probe.getAutoRepairConfig()).thenReturn(new AutoRepairConfig(false)); + when(probe.isAutoRepairDisabled()).thenReturn(true); cmd.execute(probe); From 6a6d51d7bb0146194af822e1bf0f45325d31142f Mon Sep 17 00:00:00 2001 From: Francisco Guerrero Date: Sat, 25 Jan 2025 09:01:42 -0800 Subject: [PATCH 129/257] Address comments --- src/java/org/apache/cassandra/locator/InetAddressAndPort.java | 2 +- .../apache/cassandra/tools/nodetool/AutoRepairStatusTest.java | 1 - .../cassandra/tools/nodetool/SetAutoRepairConfigTest.java | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/locator/InetAddressAndPort.java b/src/java/org/apache/cassandra/locator/InetAddressAndPort.java index d0272dfc60bd..e2520659b810 100644 --- a/src/java/org/apache/cassandra/locator/InetAddressAndPort.java +++ b/src/java/org/apache/cassandra/locator/InetAddressAndPort.java @@ -366,7 +366,7 @@ public static Set parseHosts(String value, boolean failOnErr { if (failOnError) { - throw new IllegalArgumentException(e.getMessage()); + throw new IllegalArgumentException("Failed to parse host: " + host, e); } else { diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java index 4e5f86e7233d..1c7b04d50bcb 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -72,7 +72,6 @@ public void setUp() throws Exception setAutoRepairEnabled(true); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.FULL, true); DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); -// when(probe.getAutoRepairConfig()).thenReturn(DatabaseDescriptor.getAutoRepairConfig()); } @Test(expected = IllegalArgumentException.class) diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 308e08589467..335750a2f2b3 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -192,7 +192,6 @@ public void testRepairSchedulingDisabled() @Test public void testRepairTypeDisabled() { -// config.setAutoRepairEnabled(repairType, false); cmd.repairTypeStr = repairType.name(); cmd.args = ImmutableList.of("number_of_repair_threads", "1"); From f51f29be3f8b04d4cc92de81f6dddc635e6e9cc3 Mon Sep 17 00:00:00 2001 From: Francisco Guerrero Date: Mon, 27 Jan 2025 19:48:50 -0800 Subject: [PATCH 130/257] Fix tests and rename autoRepairConfiguration -> getAutoRepairConfiguration in the AutoRepairServiceMBean definition --- .../cassandra/service/AutoRepairService.java | 2 +- .../cassandra/service/AutoRepairServiceMBean.java | 2 +- .../org/apache/cassandra/tools/NodeProbe.java | 2 +- .../repair/autorepair/SSTableRepairedAtTest.java | 5 +++-- .../service/AutoRepairServiceSetterTest.java | 15 ++++++++++++--- 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index af850f6c2dbf..1f7004c31d48 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -92,7 +92,7 @@ public boolean isAutoRepairDisabled() } @Override - public String autoRepairConfiguration() + public String getAutoRepairConfiguration() { StringBuilder sb = new StringBuilder(); sb.append("repair scheduler configuration:"); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index bad7667c6acc..be6cedba9329 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -61,7 +61,7 @@ public interface AutoRepairServiceMBean public boolean isAutoRepairDisabled(); - public String autoRepairConfiguration(); + public String getAutoRepairConfiguration(); public void setRepairSessionTimeout(String repairType, String timeout); diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 88344cb96b66..872aab37392f 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2568,7 +2568,7 @@ public boolean isAutoRepairDisabled() public String autoRepairConfiguration() { - return autoRepairProxy.autoRepairConfiguration(); + return autoRepairProxy.getAutoRepairConfiguration(); } public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java index bd14eea805b3..f1de19b6723d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -104,11 +104,12 @@ public void testMutateSSTableRepairedStateTableNotFound() { try { - StorageService.instance.mutateSSTableRepairedState(true, false, TEST_KEYSPACE, Arrays.asList("MISSING_TABLE")); + StorageService.instance.mutateSSTableRepairedState(true, false, TEST_KEYSPACE, List.of("MISSING_TABLE")); fail("Expected an InvalidRequestException to be thrown"); } - catch (InvalidRequestException e) + catch (RuntimeException e) { + assertEquals("Table MISSING_TABLE does not exist in keyspace " + TEST_KEYSPACE, e.getMessage()); // Test passed } } diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index d57e625fa9bf..2f00b38d8190 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -44,7 +44,7 @@ import java.util.stream.Stream; import static org.apache.cassandra.Util.setAutoRepairEnabled; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; @RunWith(Parameterized.class) public class AutoRepairServiceSetterTest extends CQLTester { @@ -122,10 +122,19 @@ public void prepare() { } @Test - public void testSettersTest() { + public void testSettersTest() + { DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); DatabaseDescriptor.setCDCOnRepairEnabled(false); setter.accept(repairTypeStr.name(), arg); - assertEquals(arg, getter.apply(repairTypeStr)); + T actualConfig = getter.apply(repairTypeStr); + if (actualConfig instanceof Set) + // When performing a setRepairPriorityForHosts or setForceRepairForHosts, a comma-separated list of + // ip addresses is provided as input. The configuration is expected to return a Set of Strings that + // represent the configured IP addresses. This especial handling allows verification of this special + // case where one of the entries in the Set must match the configured input. + assertThat(actualConfig).satisfiesAnyOf(entry -> assertThat(entry.toString()).contains(arg.toString())); + else + assertThat(actualConfig).isEqualTo(arg); } } From 7f1b4a2f296cd8625086523c25d885d8b78b63c0 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 28 Jan 2025 18:32:14 -0800 Subject: [PATCH 131/257] Adjust based on the latest trunk --- .../repair/autorepair/AutoRepairState.java | 7 +++++-- .../repair/autorepair/AutoRepairUtils.java | 2 +- .../autorepair/RepairTokenRangeSplitter.java | 14 ++++++++------ .../config/YamlConfigurationLoaderTest.java | 2 +- .../repair/autorepair/AutoRepairUtilsTest.java | 15 --------------- .../repair/autorepair/SSTableRepairedAtTest.java | 1 - 6 files changed, 15 insertions(+), 26 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index e5e848c9d50b..c8f90908b2e8 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -312,7 +312,7 @@ public RepairCoordinator getRepairRunnable(String keyspace, List tables, { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, - !ranges.isEmpty(), false, false, PreviewKind.REPAIRED, false, true, false, false); + !ranges.isEmpty(), false, false, PreviewKind.REPAIRED, false, true, false, false, false); option.getColumnFamilies().addAll(tables); @@ -332,7 +332,11 @@ public RepairCoordinator getRepairRunnable(String keyspace, List tables, { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, true, false, AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, +<<<<<<< HEAD false, false, PreviewKind.NONE, true, true, false, false, false, false); +======= + !ranges.isEmpty(), false, false, PreviewKind.NONE, true, true, false, false, false); +>>>>>>> ed19d4c2b1 (Adjust based on the latest trunk) option.getColumnFamilies().addAll(filterOutUnsafeTables(keyspace, tables)); @@ -379,7 +383,6 @@ public RepairCoordinator getRepairRunnable(String keyspace, List tables, AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.NONE, true, true, false, false, false, false); - option.getColumnFamilies().addAll(tables); return getRepairRunnable(keyspace, option); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index a35542186561..e35f491366ed 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -797,7 +797,7 @@ public static boolean shouldConsiderKeyspace(Keyspace ks) if (replicationStrategy instanceof NetworkTopologyStrategy) { Set datacenters = ((NetworkTopologyStrategy) replicationStrategy).getDatacenters(); - String localDC = DatabaseDescriptor.getLocator().location(FBUtilities.getBroadcastAddressAndPort()).datacenter; + String localDC = DatabaseDescriptor.getLocator().local().datacenter; if (!datacenters.contains(localDC)) { repair = false; diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 05fda709eabf..7f12020f3020 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -37,6 +37,11 @@ import java.util.stream.Collectors; import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.db.DataRange; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.io.sstable.SSTableReadsListener; +import org.apache.cassandra.io.sstable.format.big.BigTableReader; import org.apache.cassandra.tcm.compatibility.TokenRingUtils; import org.apache.cassandra.utils.FBUtilities; import org.slf4j.Logger; @@ -54,7 +59,6 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.big.BigTableScanner; import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; @@ -692,12 +696,11 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai // get the bounds of the sstable for this range using the index file but do not actually read it. List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); - ISSTableScanner rangeScanner = reader.getScanner(Collections.singleton(tokenRange)); - // Type check scanner returned as it may be an EmptySSTableScanner if the range is not covered in the + // Type check bounds to check if the range is not covered in the // SSTable, in this case we will avoid incrementing approxBytesInRange. - if (rangeScanner instanceof BigTableScanner) + if (!bounds.isEmpty() && reader instanceof BigTableReader) { - try (BigTableScanner scanner = (BigTableScanner) rangeScanner) + try (BigTableScanner scanner = (BigTableScanner) BigTableScanner.getScanner((BigTableReader) reader, ColumnFilter.all(reader.metadata()), DataRange.forTokenRange(tokenRange), SSTableReadsListener.NOOP_LISTENER)) { assert bounds.size() == 1; @@ -714,7 +717,6 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); } } - } catch (IOException | CardinalityMergeException e) { diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index b2708b08b81a..3b7bf529ed31 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -565,4 +565,4 @@ public static Config load(String path) } return new YamlConfigurationLoader().loadConfig(url); } -} \ No newline at end of file +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index d80b26a7fc31..1f64638bd734 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -34,7 +34,6 @@ import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.marshal.UUIDType; -import org.apache.cassandra.locator.IEndpointSnitch; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Locator; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; @@ -48,8 +47,6 @@ import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.utils.FBUtilities; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; @@ -64,7 +61,6 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.when; public class AutoRepairUtilsTest extends CQLTester { @@ -73,18 +69,12 @@ public class AutoRepairUtilsTest extends CQLTester static InetAddressAndPort localEndpoint; - @Mock - static Locator snitchMock; - - static Locator defaultSnitch; - @BeforeClass public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); setAutoRepairEnabled(true); requireNetwork(); - defaultSnitch = DatabaseDescriptor.getLocator(); localEndpoint = FBUtilities.getBroadcastAddressAndPort(); hostId = StorageService.instance.getHostIdForEndpoint(localEndpoint); StorageService.instance.doAutoRepairSetup(); @@ -98,7 +88,6 @@ public void setup() QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", "ks", "tbl")); AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); - MockitoAnnotations.initMocks(this); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); @@ -234,10 +223,6 @@ public void testGetHostIdsInCurrentRing_multiple_nodes() InetAddressAndPort ignoredEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 1); InetAddressAndPort deadEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 2); DatabaseDescriptor.getAutoRepairConfig().setIgnoreDCs(repairType, ImmutableSet.of("dc2")); - //DatabaseDescriptor.setEndpointSnitch(snitchMock); - when(snitchMock.location(localEndpoint)).thenReturn(new Location("dc1", "rac1")); - when(snitchMock.location(ignoredEndpoint)).thenReturn(new Location("dc2", "rac1")); - when(snitchMock.location(deadEndpoint)).thenReturn(new Location("dc1", "rac1")); TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType, ImmutableSet.of(new NodeAddresses(localEndpoint), new NodeAddresses(ignoredEndpoint), new NodeAddresses(deadEndpoint))); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java index f1de19b6723d..8c0588cb1244 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -34,7 +34,6 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.service.StorageService; From 5686864be929720e53f1dd2db3e30aa916524145 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 3 Feb 2025 18:08:09 -0800 Subject: [PATCH 132/257] Remove commented code; use TokenRingUtils.getPrimaryRangesForEndpoint instead of StorageService.instance.getPrimaryRanges --- .../apache/cassandra/repair/autorepair/AutoRepairUtils.java | 1 - .../repair/autorepair/FixedSplitTokenRangeSplitter.java | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index e35f491366ed..f3f33e2c7a14 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -730,7 +730,6 @@ public static void addPriorityHosts(RepairType repairType, Set tableNames = repairPlan.getTableNames(); - Collection> tokens = StorageService.instance.getPrimaryRanges(keyspaceName); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(keyspaceName, FBUtilities.getBroadcastAddressAndPort()); if (!primaryRangeOnly) { // if we need to repair non-primary token ranges, then change the tokens accordingly From 1442b583df854e6acc88d8cae60bd88e76d26bef Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Feb 2025 18:58:55 -0600 Subject: [PATCH 133/257] Update RepairTokenRangeSplitter to work with BTI-formatted SSTables While doing a review pass on the CEP-37 PR I realized that RepairTokenRangeSplitter could not possibly work with the new BTI sstable format introduced in 5.0 because it explicitly checks for BigTableReader implementations. In addition, the way it calculates the amount of bytes a range covers in an SSTable is big-format specific. I noticed that a new utility method SSTableReader.onDiskSizeForPartitionPositions was added recently in CASSANDRA-20092 that can effectively calculate this for us in an implementation-agnostic way. This change updates the code to use this method, and also remove any changes we previously made in SSTableScanner and BigTableScanner for this. Patch by Andy Tolbert; Reviewed by ___ for CASSANDRA-20315 --- .../io/sstable/format/SSTableScanner.java | 10 ---- .../sstable/format/big/BigTableScanner.java | 45 ++---------------- .../autorepair/RepairTokenRangeSplitter.java | 46 ++++++------------- .../RepairTokenRangeSplitterTest.java | 30 +++++++----- 4 files changed, 37 insertions(+), 94 deletions(-) diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java index 001152d66d11..28035a85da0b 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Set; @@ -36,7 +35,6 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.AbstractBounds.Boundary; import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.AbstractRowIndexEntry; import org.apache.cassandra.io.sstable.CorruptSSTableException; import org.apache.cassandra.io.sstable.ISSTableScanner; @@ -85,14 +83,6 @@ protected SSTableScanner(S sstable, this.listener = listener; } - public static List> makeBounds(SSTableReader sstable, Collection> tokenRanges) - { - List> boundsList = new ArrayList<>(tokenRanges.size()); - for (Range range : Range.normalize(tokenRanges)) - addRange(sstable, Range.makeRowRange(range), boundsList); - return boundsList; - } - protected static List> makeBounds(SSTableReader sstable, DataRange dataRange) { List> boundsList = new ArrayList<>(2); diff --git a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java index b1e632e5b44c..83243529c4c9 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java @@ -20,8 +20,6 @@ import java.io.IOException; import java.util.Iterator; -import javax.annotation.Nullable; - import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; @@ -66,12 +64,9 @@ private BigTableScanner(BigTableReader sstable, this.rowIndexEntrySerializer = new RowIndexEntry.Serializer(sstable.descriptor.version, sstable.header, sstable.owner().map(SSTable.Owner::getMetrics).orElse(null)); } - // Helper method to seek to the index for the given position or range and optionally in the data file - private long seekAndProcess(@Nullable PartitionPosition position, - @Nullable AbstractBounds range, - boolean seekDataFile) throws CorruptSSTableException + private void seekToCurrentRangeStart() { - long indexPosition = sstable.getIndexScanPosition(position); + long indexPosition = sstable.getIndexScanPosition(currentRange.left); ifile.seek(indexPosition); try { @@ -80,21 +75,13 @@ private long seekAndProcess(@Nullable PartitionPosition position, { indexPosition = ifile.getFilePointer(); DecoratedKey indexDecoratedKey = sstable.decorateKey(ByteBufferUtil.readWithShortLength(ifile)); - - // Whether the position or range is present in the SSTable. - boolean isFound = (range == null && indexDecoratedKey.compareTo(position) > 0) - || (range != null && (indexDecoratedKey.compareTo(range.left) > 0 || range.contains(indexDecoratedKey))); - if (isFound) + if (indexDecoratedKey.compareTo(currentRange.left) > 0 || currentRange.contains(indexDecoratedKey)) { // Found, just read the dataPosition and seek into index and data files long dataPosition = RowIndexEntry.Serializer.readPosition(ifile); - // seek the index file position as we will presumably seek further when going to the next position. ifile.seek(indexPosition); - if (seekDataFile) - { - dfile.seek(dataPosition); - } - return dataPosition; + dfile.seek(dataPosition); + break; } else { @@ -107,28 +94,6 @@ private long seekAndProcess(@Nullable PartitionPosition position, sstable.markSuspect(); throw new CorruptSSTableException(e, sstable.getFilename()); } - // If for whatever reason we don't find position in file, just return 0 - return 0L; - } - - /** - * Seeks to the start of the current range and updates both the index and data file positions. - */ - private void seekToCurrentRangeStart() throws CorruptSSTableException - { - seekAndProcess(currentRange.left, currentRange, true); - } - - /** - * Gets the position in the data file, but does not seek to it. This does seek the index to find the data position - * but does not actually seek the data file. - * @param position position to find in data file. - * @return offset in data file where position exists. - * @throws CorruptSSTableException if SSTable was malformed - */ - public long getDataPosition(PartitionPosition position) throws CorruptSSTableException - { - return seekAndProcess(position, null, false); } protected void doClose() throws IOException diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 7f12020f3020..82a63caea1bc 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -38,10 +38,6 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.db.DataRange; -import org.apache.cassandra.db.filter.ColumnFilter; -import org.apache.cassandra.io.sstable.SSTableReadsListener; -import org.apache.cassandra.io.sstable.format.big.BigTableReader; import org.apache.cassandra.tcm.compatibility.TokenRingUtils; import org.apache.cassandra.utils.FBUtilities; import org.slf4j.Logger; @@ -56,11 +52,9 @@ import org.apache.cassandra.db.lifecycle.SSTableIntervalTree; import org.apache.cassandra.db.lifecycle.SSTableSet; import org.apache.cassandra.db.lifecycle.View; -import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.io.sstable.format.big.BigTableScanner; import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; import org.apache.cassandra.io.sstable.metadata.MetadataType; import org.apache.cassandra.io.util.FileUtils; @@ -210,7 +204,8 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 100GiB of * data is written to a node per min_repair_interval. */ - private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) {{ + private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) + {{ put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) .build()); // Restrict incremental repair to 50GB bytes per assignment to confine the amount of possible autocompaction. @@ -359,7 +354,8 @@ else if (tableAssignments.size() == 1 && long currentAssignmentsBytes = getEstimatedBytes(currentAssignments); long tableAssignmentsBytes = getEstimatedBytes(tableAssignments); // only add assignments together if they don't exceed max bytes per schedule. - if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule.toBytes()) { + if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule.toBytes()) + { currentAssignments.addAll(tableAssignments); } else @@ -677,6 +673,7 @@ private List getRangeSizeEstimate(String keyspace, String table, C @VisibleForTesting static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repairType, String keyspace, String table, Range tokenRange, Refs refs) { + List> singletonRange = Collections.singletonList(tokenRange); ICardinality cardinality = new HyperLogLogPlus(13, 25); long approxBytesInRange = 0L; long totalBytes = 0L; @@ -691,32 +688,14 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai if (metadata != null) cardinality = cardinality.merge(metadata.cardinalityEstimator); - long sstableSize = reader.bytesOnDisk(); + // use onDiskLength, which is the actual size of the SSTable data file. + long sstableSize = reader.onDiskLength(); totalBytes += sstableSize; - // get the bounds of the sstable for this range using the index file but do not actually read it. - List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); - // Type check bounds to check if the range is not covered in the - // SSTable, in this case we will avoid incrementing approxBytesInRange. - if (!bounds.isEmpty() && reader instanceof BigTableReader) - { - try (BigTableScanner scanner = (BigTableScanner) BigTableScanner.getScanner((BigTableReader) reader, ColumnFilter.all(reader.metadata()), DataRange.forTokenRange(tokenRange), SSTableReadsListener.NOOP_LISTENER)) - { - assert bounds.size() == 1; - - AbstractBounds bound = bounds.get(0); - long startPosition = scanner.getDataPosition(bound.left); - long endPosition = scanner.getDataPosition(bound.right); - // If end position is 0 we can assume the sstable ended before that token, bound at size of file - if (endPosition == 0) - { - endPosition = sstableSize; - } - - long approximateRangeBytesInSSTable = Math.max(0, endPosition - startPosition); - approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); - } - } + // get the on disk size for the token range, note for compressed data this includes the full + // chunks the start and end ranges are found in. + long approximateRangeBytesInSSTable = reader.onDiskSizeForPartitionPositions(reader.getPositionsForRanges(singletonRange)); + approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); } catch (IOException | CardinalityMergeException e) { @@ -881,7 +860,8 @@ public SizedRepairAssignment(Range tokenRange, String keyspaceName, List< /** * @return Additional metadata about the repair assignment. */ - public String getDescription() { + public String getDescription() + { return description; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index daeb6466fe2e..329973b81900 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -81,16 +81,19 @@ public void setUp() public void testSizePartitionCount() throws Throwable { insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - Refs sstables = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE); - assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); - SizeEstimate sizes = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); - assertEquals(10, sizes.partitions); + try (Refs sstables = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE)) + { + assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); + SizeEstimate sizes = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); + assertEquals(10, sizes.partitions); + } } @Test public void testSizePartitionCountSplit() throws Throwable { - int[] values = new int[10000]; + int partitionCount = 100_000; + int[] values = new int[partitionCount]; for (int i = 0; i < values.length; i++) values[i] = i + 1; insertAndFlushTable(tableName, values); @@ -99,12 +102,17 @@ public void testSizePartitionCountSplit() throws Throwable Range tokenRange2 = range.next(); Assert.assertFalse(range.hasNext()); - Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); - Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2); - SizeEstimate sizes1 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); - SizeEstimate sizes2 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); - // +-5% because HLL merge and the applying of range size approx ratio causes estimation errors - assertTrue(Math.abs(10000 - (sizes1.partitions + sizes2.partitions)) <= 60); + try(Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); + Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2)) + { + SizeEstimate sizes1 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); + SizeEstimate sizes2 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); + + // +-5% because including entire compression blocks covering token range, HLL merge and the applying of range size approx ratio causes estimation errors + long allowableDelta = (long) (partitionCount * .05); + long estimatedPartitionDelta = Math.abs(partitionCount - (sizes1.partitions + sizes2.partitions)); + assertTrue("Partition count delta was +/-" + estimatedPartitionDelta + " but expected +/- " + allowableDelta, estimatedPartitionDelta <= allowableDelta); + } } @Test From 8c46cd5fddd006632165621169155da5856ffa55 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Feb 2025 19:04:19 -0600 Subject: [PATCH 134/257] Avoid possible division by zero --- .../repair/autorepair/RepairTokenRangeSplitter.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 82a63caea1bc..b6dcfc61b4ae 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -702,9 +702,14 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai logger.error("Error calculating size estimate for {}.{} for range {} on {}", keyspace, table, tokenRange, reader, e); } } - double ratio = approxBytesInRange / (double) totalBytes; - // use the ratio from size to estimate the partitions in the range as well - long partitions = (long) Math.max(1, Math.ceil(cardinality.cardinality() * ratio)); + + long partitions = 0L; + if (totalBytes > 0) + { + // use the ratio from size to estimate the partitions in the range as well + double ratio = approxBytesInRange / (double) totalBytes; + partitions = (long) Math.max(1, Math.ceil(cardinality.cardinality() * ratio)); + } return new SizeEstimate(repairType, keyspace, table, tokenRange, partitions, approxBytesInRange, totalBytes); } From 3bbf35f7993885447e50c8af92c37acb60dba692 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Feb 2025 19:18:41 -0600 Subject: [PATCH 135/257] Test with Bti and Big format sstables --- .../autorepair/RepairTokenRangeSplitterTest.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 329973b81900..065b948c481d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -31,6 +31,8 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.apache.cassandra.config.DataStorageSpec.LongMebibytesBound; import org.apache.cassandra.config.DatabaseDescriptor; @@ -41,6 +43,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.big.BigFormat; +import org.apache.cassandra.io.sstable.format.bti.BtiFormat; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.FilteredRepairAssignments; import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.SizeEstimate; @@ -54,27 +57,36 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +@RunWith(Parameterized.class) public class RepairTokenRangeSplitterTest extends CQLTester { private RepairTokenRangeSplitter repairRangeSplitter; private String tableName; private static Range FULL_RANGE; + @Parameterized.Parameter() + public String sstableFormat; + + @Parameterized.Parameters(name = "sstableFormat={0}") + public static Collection sstableFormats() + { + return List.of(BtiFormat.NAME, BigFormat.NAME); + } + @BeforeClass public static void setUpClass() { CQLTester.setUpClass(); AutoRepairService.setup(); FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); - DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(BigFormat.NAME)); } @Before public void setUp() { + DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(sstableFormat)); repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); - assertTrue(BigFormat.isSelected()); } @Test From c32e612ef5252f39981ed8c9f3ea9bd3816ca583 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Feb 2025 19:58:51 -0600 Subject: [PATCH 136/257] Assert correct format selected --- .../repair/autorepair/RepairTokenRangeSplitterTest.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 065b948c481d..d1f598682326 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -87,6 +87,15 @@ public void setUp() DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(sstableFormat)); repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + // ensure correct format is selected. + if (sstableFormat.equalsIgnoreCase(BigFormat.NAME)) + { + assertTrue(BigFormat.isSelected()); + } + else + { + assertTrue(BtiFormat.isSelected()); + } } @Test From 4b2380d4ea4a250dc88a11f3bdf1ee334cd7851e Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 10 Feb 2025 15:59:30 -0800 Subject: [PATCH 137/257] Fix review comments from Andy --- conf/cassandra.yaml | 5 ++ conf/cassandra_latest.yaml | 12 ++++ .../org/apache/cassandra/config/Config.java | 2 +- .../apache/cassandra/config/DurationSpec.java | 2 +- .../cassandra/config/ParameterizedClass.java | 2 +- .../db/streaming/CassandraStreamReceiver.java | 10 ++-- .../FixedSplitTokenRangeSplitter.java | 3 +- .../service/ActiveRepairService.java | 3 + .../service/ActiveRepairServiceMBean.java | 4 ++ .../cassandra/service/StorageService.java | 2 +- .../tcm/sequences/BootstrapAndJoin.java | 5 +- .../tcm/sequences/BootstrapAndReplace.java | 5 +- .../tcm/sequences/ReplaceSameAddress.java | 5 +- .../apache/cassandra/utils/FBUtilities.java | 2 +- .../config/YamlConfigurationLoaderTest.java | 2 +- .../service/AutoRepairServiceSetterTest.java | 60 +++++++++++-------- 16 files changed, 81 insertions(+), 43 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 5b6e0dc51fc5..acb017ad8765 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2675,6 +2675,11 @@ storage_compatibility_mode: NONE # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s +# at least 20% of disk must be unused to run incremental repair +# if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) +# then set the ratio to 0.0 +# incremental_repair_disk_headroom_reject_ratio = 0.2; + # Configuration for AutoRepair Scheduler. # auto_repair: # # Enable/Disable the auto-repair scheduler. diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index ed99e9eece19..cfc03943d515 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2047,6 +2047,13 @@ report_unconfirmed_repaired_data_mismatches: false # Materialized views are considered experimental and are not recommended for production use. materialized_views_enabled: false +# Specify whether Materialized View mutations are replayed through the write path on streaming, e.g. repair. +# When enabled, Materialized View data streamed to the destination node will be written into commit log first. When setting to false, +# the streamed Materialized View data is written into SSTables just the same as normal streaming. The default is true. +# If this is set to false, streaming will be considerably faster however it's possible that, in extreme situations +# (losing > quorum # nodes in a replica set), you may have data in your SSTables that never makes it to the Materialized View. +# materialized_views_on_repair_enabled: true + # Enables SASI index creation on this node. # SASI indexes are considered experimental and are not recommended for production use. sasi_indexes_enabled: false @@ -2363,6 +2370,11 @@ default_secondary_index_enabled: true # storage_compatibility_mode: NONE +# at least 20% of disk must be unused to run incremental repair +# if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) +# then set the ratio to 0.0 +# incremental_repair_disk_headroom_reject_ratio = 0.2; + # Configuration for AutoRepair Scheduler. # auto_repair: # # Enable/Disable the auto-repair scheduler. diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 9a0c45622167..1156675799d4 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -33,7 +33,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +43,7 @@ import org.apache.cassandra.index.internal.CassandraIndex; import org.apache.cassandra.io.compress.BufferType; import org.apache.cassandra.io.sstable.format.big.BigFormat; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.StartupChecks.StartupCheckType; import org.apache.cassandra.utils.StorageCompatibilityMode; diff --git a/src/java/org/apache/cassandra/config/DurationSpec.java b/src/java/org/apache/cassandra/config/DurationSpec.java index ee35de58e81b..969ba255ea6d 100644 --- a/src/java/org/apache/cassandra/config/DurationSpec.java +++ b/src/java/org/apache/cassandra/config/DurationSpec.java @@ -43,7 +43,7 @@ * users the opportunity to be able to provide config with a unit of their choice in cassandra.yaml as per the available * options. (CASSANDRA-15234) */ -public abstract class DurationSpec implements Serializable +public abstract class DurationSpec { /** * The Regexp used to parse the duration provided as String. diff --git a/src/java/org/apache/cassandra/config/ParameterizedClass.java b/src/java/org/apache/cassandra/config/ParameterizedClass.java index a38dbc1d2f0b..24f7307057e3 100644 --- a/src/java/org/apache/cassandra/config/ParameterizedClass.java +++ b/src/java/org/apache/cassandra/config/ParameterizedClass.java @@ -34,7 +34,7 @@ import static org.apache.cassandra.utils.Shared.Scope.SIMULATION; @Shared(scope = SIMULATION) -public class ParameterizedClass implements Serializable +public class ParameterizedClass { public static final String CLASS_NAME = "class_name"; public static final String PARAMETERS = "parameters"; diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 2c31b175679d..2ce86fcb715a 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -26,20 +26,18 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; -import org.apache.cassandra.io.sstable.SSTable; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.primitives.Ranges; +import io.github.jbellis.jvector.annotations.VisibleForTesting; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.compaction.OperationType; import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; import org.apache.cassandra.db.lifecycle.LifecycleTransaction; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.ThrottledUnfilteredIterator; @@ -49,6 +47,7 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.ISSTableScanner; +import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableMultiWriter; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.service.accord.AccordService; @@ -208,7 +207,8 @@ private boolean cdcRequiresWriteCommitLog(ColumnFamilyStore cfs) * For CDC-enabled tables and write path for CDC is enabled, we want to ensure that the mutations are * run through the CommitLog, so they can be archived by the CDC process on discard. */ - public boolean requiresWritePath(ColumnFamilyStore cfs) + @VisibleForTesting + boolean requiresWritePath(ColumnFamilyStore cfs) { return cdcRequiresWriteCommitLog(cfs) || cfs.streamToMemtable() diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 7596272e78fb..f4d09aa18d82 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -72,7 +72,8 @@ public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map< @Override public Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans) { - return new RepairAssignmentIterator(repairPlans) { + return new RepairAssignmentIterator(repairPlans) + { @Override protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index e39af4339ac5..22d78794bcee 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -51,6 +51,9 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DurationSpec; diff --git a/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java b/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java index 851dc6c802bb..c739b048d68f 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java @@ -74,4 +74,8 @@ public interface ActiveRepairServiceMBean int parentRepairSessionsCount(); public int getPaxosRepairParallelism(); public void setPaxosRepairParallelism(int v); + + public double getIncrementalRepairDiskHeadroomRejectRatio(); + + public void setIncrementalRepairDiskHeadroomRejectRatio(double value); } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 741c2bed7fb3..bd9b67bc37b0 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -1137,7 +1137,7 @@ public void doAutoRepairSetup() AutoRepairService.setup(); if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { - logger.info("Enable auto-repair scheduling"); + logger.info("Enabling auto-repair scheduling"); AutoRepair.instance.setup(); } logger.info("AutoRepair setup complete!"); diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java index 19d6cb50edae..0e32f18c2507 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java @@ -221,8 +221,6 @@ public SequenceState executeNext() "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); return halted(); } - // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else @@ -246,6 +244,9 @@ public SequenceState executeNext() .filter(cfs -> Schema.instance.getUserKeyspaces().names().contains(cfs.keyspace.getName())) .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); ClusterMetadataService.instance().commit(midJoin); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } else { diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java index f3a3a8ff4577..8bc73d142eb9 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java @@ -218,8 +218,6 @@ public SequenceState executeNext() "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); return halted(); } - // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else @@ -243,6 +241,9 @@ public SequenceState executeNext() .filter(cfs -> Schema.instance.getUserKeyspaces().names().contains(cfs.keyspace.getName())) .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); ClusterMetadataService.instance().commit(midReplace); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } else { diff --git a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java index 1c55d84ca244..4580d716a1ba 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java +++ b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java @@ -86,8 +86,6 @@ public static void streamData(NodeId nodeId, ClusterMetadata metadata, boolean s "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); throw new IllegalStateException("Could not finish join for during replacement"); } - // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } if (finishJoiningRing) @@ -98,6 +96,9 @@ public static void streamData(NodeId nodeId, ClusterMetadata metadata, boolean s .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); BootstrapAndReplace.gossipStateToNormal(metadata, metadata.myNodeId()); Gossiper.instance.mergeNodeToGossip(metadata.myNodeId(), metadata); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } } } diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 9d55b4ca8280..ed44b84164f1 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -66,7 +66,6 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; -import org.apache.cassandra.io.util.File; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,6 +88,7 @@ import org.apache.cassandra.io.sstable.metadata.MetadataType; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputBufferFixed; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.security.AbstractCryptoProvider; diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 3b7bf529ed31..10075671e589 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -39,6 +39,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.io.util.File; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.yaml.snakeyaml.error.YAMLException; import static org.apache.cassandra.config.CassandraRelevantProperties.CONFIG_ALLOW_SYSTEM_PROPERTIES; @@ -46,7 +47,6 @@ import static org.apache.cassandra.config.YamlConfigurationLoader.SYSTEM_PROPERTY_PREFIX; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index 2f00b38d8190..d3a99590de07 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service; import com.google.common.collect.ImmutableSet; + import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; @@ -28,6 +29,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; + import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -47,7 +49,8 @@ import static org.assertj.core.api.Assertions.assertThat; @RunWith(Parameterized.class) -public class AutoRepairServiceSetterTest extends CQLTester { +public class AutoRepairServiceSetterTest extends CQLTester +{ private static final AutoRepairConfig config = new AutoRepairConfig(true); @Parameterized.Parameter @@ -63,45 +66,51 @@ public class AutoRepairServiceSetterTest extends CQLTester { public Function getter; @Parameterized.Parameters(name = "{index}: repairType={0}, arg={1}") - public static Collection testCases() { + public static Collection testCases() + { DatabaseDescriptor.setConfig(DatabaseDescriptor.loadConfig()); return Stream.of( - forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), - forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), - forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), - forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), - forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), - forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), - forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), - forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMaterializedViewRepairEnabled), - forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setRepairPriorityForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairUtils::getPriorityHosts), - forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setForceRepairForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairServiceSetterTest::isLocalHostForceRepair) + forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), + forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), + forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), + forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), + forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), + forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), + forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), + forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMaterializedViewRepairEnabled), + forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setRepairPriorityForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairUtils::getPriorityHosts), + forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setForceRepairForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairServiceSetterTest::isLocalHostForceRepair) ).flatMap(Function.identity()).collect(Collectors.toList()); } - private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) { + private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) + { UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( - "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); + "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); - if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) { + if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) + { return ImmutableSet.of(InetAddressAndPort.getLocalHost()); } return ImmutableSet.of(); } - private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { + private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) + { Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { - testCases[repairType.ordinal()] = new Object[]{repairType, arg, setter, getter}; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + testCases[repairType.ordinal()] = new Object[]{ repairType, arg, setter, getter }; } return Arrays.stream(testCases); } @BeforeClass - public static void setup() throws Exception { + public static void setup() throws Exception + { DatabaseDescriptor.daemonInitialization(); setAutoRepairEnabled(true); requireNetwork(); @@ -112,13 +121,14 @@ public static void setup() throws Exception { } @Before - public void prepare() { + public void prepare() + { QueryProcessor.executeInternal(String.format( - "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); QueryProcessor.executeInternal(String.format( - "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); } @Test From e98bb69689a5d98c91d438db2e9f7ac25283d02a Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 10 Feb 2025 20:46:18 -0800 Subject: [PATCH 138/257] Fix review comments from Jaydeepkumar --- conf/cassandra.yaml | 3 +- conf/cassandra_latest.yaml | 3 +- .../db/streaming/CassandraStreamReceiver.java | 2 +- .../cassandra/metrics/AutoRepairMetrics.java | 1 - .../metrics/AutoRepairMetricsManager.java | 3 ++ .../repair/autorepair/AutoRepair.java | 3 ++ .../repair/autorepair/AutoRepairConfig.java | 3 ++ .../repair/autorepair/AutoRepairState.java | 4 ++- .../FixedSplitTokenRangeSplitter.java | 3 ++ .../cassandra/schema/AutoRepairParams.java | 3 ++ .../apache/cassandra/schema/TableParams.java | 3 +- .../cassandra/service/AutoRepairService.java | 3 ++ .../service/AutoRepairServiceMBean.java | 3 ++ .../org/apache/cassandra/tools/NodeProbe.java | 26 ++++++++-------- .../tools/nodetool/AutoRepairStatus.java | 5 ++- .../tools/nodetool/GetAutoRepairConfig.java | 3 ++ .../tools/nodetool/SSTableRepairedSet.java | 5 ++- .../tools/nodetool/SetAutoRepairConfig.java | 25 ++++++++------- .../test/repair/AutoRepairSchedulerTest.java | 3 ++ test/unit/org/apache/cassandra/Util.java | 2 +- .../CassandraStreamReceiverTest.java | 3 ++ .../AutoRepairConfigRepairTypeTest.java | 1 - .../autorepair/AutoRepairConfigTest.java | 3 ++ .../autorepair/AutoRepairKeyspaceTest.java | 3 ++ .../AutoRepairParameterizedTest.java | 3 ++ .../AutoRepairStateFactoryTest.java | 3 ++ .../autorepair/AutoRepairStateTest.java | 3 ++ .../repair/autorepair/AutoRepairTest.java | 3 ++ .../autorepair/AutoRepairUtilsTest.java | 3 ++ .../FixedSplitTokenRangeSplitterTest.java | 3 ++ .../autorepair/PrioritizedRepairPlanTest.java | 3 ++ .../RepairTokenRangeSplitterTest.java | 3 ++ .../autorepair/SSTableRepairedAtTest.java | 3 ++ .../service/AutoRepairServiceBasicTest.java | 3 ++ .../AutoRepairServiceRepairTypeTest.java | 3 ++ .../service/AutoRepairServiceSetterTest.java | 3 ++ .../tools/nodetool/AutoRepairStatusTest.java | 5 ++- .../nodetool/SSTableRepairedSetTest.java | 7 +++-- .../nodetool/SetAutoRepairConfigTest.java | 31 ++++++++++--------- 39 files changed, 143 insertions(+), 52 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index acb017ad8765..39e0ab776964 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2675,7 +2675,8 @@ storage_compatibility_mode: NONE # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s -# at least 20% of disk must be unused to run incremental repair +# At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during +# incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) # then set the ratio to 0.0 # incremental_repair_disk_headroom_reject_ratio = 0.2; diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index cfc03943d515..da44d4f7b999 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2370,7 +2370,8 @@ default_secondary_index_enabled: true # storage_compatibility_mode: NONE -# at least 20% of disk must be unused to run incremental repair +# At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during +# incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) # then set the ratio to 0.0 # incremental_repair_disk_headroom_reject_ratio = 0.2; diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 2ce86fcb715a..61f64ce3d0af 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Set; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; @@ -30,7 +31,6 @@ import org.slf4j.LoggerFactory; import accord.primitives.Ranges; -import io.github.jbellis.jvector.annotations.VisibleForTesting; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index b097dd3414c4..0f5cefd30899 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -91,7 +91,6 @@ public Integer getValue() } }); - longestUnrepairedSec = Metrics.register(factory.createMetricName("LongestUnrepairedSec"), new Gauge() { public Integer getValue() diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java index e293945c9846..e97ce34e5a73 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java @@ -23,6 +23,9 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +/** + * AutoRepair metrics manager holding all the auto-repair related metrics. + */ public class AutoRepairMetricsManager { private static final Map metrics = new ConcurrentHashMap<>(); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 46e84e59f25b..bd656a46340d 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -63,6 +63,9 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +/** + * AutoRepair scheduler responsible for running different types of repairs. + */ public class AutoRepair { private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 80381af73139..19c3029011de 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -39,6 +39,9 @@ import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.FBUtilities; +/** + * Defines configurations for AutoRepair. + */ public class AutoRepairConfig implements Serializable { // Enable/Disable the auto-repair scheduler. diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index c8f90908b2e8..dad7043d4fa7 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -55,7 +55,9 @@ import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; -// AutoRepairState represents the state of automated repair for a given repair type. +/** + * AutoRepairState represents the state of automated repair for a given repair type. + */ public abstract class AutoRepairState implements ProgressListener { protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index f4d09aa18d82..a8d8b4744484 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -38,6 +38,9 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; +/** + * An implementation that splits token ranges into a fixed number of subranges. + */ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { private static final Logger logger = LoggerFactory.getLogger(FixedSplitTokenRangeSplitter.class); diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index 105b9f6d0ddd..b2de800f83e0 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -33,6 +33,9 @@ import static java.lang.String.format; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; +/** + * AutoRepair table parameters - used to define the auto-repair configuration for a table. + */ public final class AutoRepairParams { public enum Option diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 7bb1a6a54e6d..df295b26b232 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -198,8 +198,7 @@ public static Builder builder(TableParams params) .transactionalMode(params.transactionalMode) .transactionalMigrationFrom(params.transactionalMigrationFrom) .pendingDrop(params.pendingDrop) - .automatedRepair(params.autoRepair) - ; + .automatedRepair(params.autoRepair); } public Builder unbuild() diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 1f7004c31d48..1c96ab685d2c 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -36,6 +36,9 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; +/** + * Implement all the MBeans for AutoRepair. + */ public class AutoRepairService implements AutoRepairServiceMBean { public static final String MBEAN_NAME = "org.apache.cassandra.db:type=AutoRepairService"; diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index be6cedba9329..40948f3711a5 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -20,6 +20,9 @@ import java.util.Set; +/** + * Defines all the MBeans exposed for AutoRepair. + */ public interface AutoRepairServiceMBean { /** diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 872aab37392f..a58f26c9aaf2 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2581,22 +2581,22 @@ public void setAutoRepairEnabled(String repairType, boolean enabled) autoRepairProxy.setAutoRepairEnabled(repairType, enabled); } - public void setRepairThreads(String repairType, int repairThreads) + public void setAutoRepairThreads(String repairType, int repairThreads) { autoRepairProxy.setRepairThreads(repairType, repairThreads); } - public void setRepairPriorityForHosts(String repairType, String commaSeparatedHostSet) + public void setAutoRepairPriorityForHosts(String repairType, String commaSeparatedHostSet) { autoRepairProxy.setRepairPriorityForHosts(repairType, commaSeparatedHostSet); } - public void setForceRepairForHosts(String repairType, String commaSeparatedHostSet) + public void setAutoRepairForceRepairForHosts(String repairType, String commaSeparatedHostSet) { autoRepairProxy.setForceRepairForHosts(repairType, commaSeparatedHostSet); } - public void setRepairMinInterval(String repairType, String minRepairInterval) + public void setAutoRepairMinInterval(String repairType, String minRepairInterval) { autoRepairProxy.setRepairMinInterval(repairType, minRepairInterval); } @@ -2606,7 +2606,7 @@ public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) autoRepairProxy.setAutoRepairHistoryClearDeleteHostsBufferDuration(duration); } - public void startScheduler() + public void startAutoRepairScheduler() { autoRepairProxy.startScheduler(); } @@ -2626,7 +2626,7 @@ public void setAutoRepairMinRepairTaskDuration(String duration) autoRepairProxy.setAutoRepairMinRepairTaskDuration(duration); } - public void setRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold) + public void setAutoRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold) { autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); } @@ -2641,22 +2641,22 @@ public void setAutoRepairIgnoreDCs(String repairType, Set ignoreDCs) autoRepairProxy.setIgnoreDCs(repairType, ignoreDCs); } - public void setParallelRepairPercentage(String repairType, int percentage) + public void setAutoRepairParallelRepairPercentage(String repairType, int percentage) { autoRepairProxy.setParallelRepairPercentage(repairType, percentage); } - public void setParallelRepairCount(String repairType, int count) + public void setAutoRepairParallelRepairCount(String repairType, int count) { autoRepairProxy.setParallelRepairCount(repairType, count); } - public void setPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) + public void setAutoRepairPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) { autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); } - public void setMaterializedViewRepairEnabled(String repairType, boolean enabled) + public void setAutoRepairMaterializedViewRepairEnabled(String repairType, boolean enabled) { autoRepairProxy.setMVRepairEnabled(repairType, enabled); } @@ -2666,17 +2666,17 @@ public List mutateSSTableRepairedState(boolean repair, boolean preview, return ssProxy.mutateSSTableRepairedState(repair, preview, keyspace, tables); } - public List getTablesForKeyspace(String keyspace) + public List getAutoRepairTablesForKeyspace(String keyspace) { return ssProxy.getTablesForKeyspace(keyspace); } - public void setRepairSessionTimeout(String repairType, String timeout) + public void setAutoRepairSessionTimeout(String repairType, String timeout) { autoRepairProxy.setRepairSessionTimeout(repairType, timeout); } - public Set getOnGoingRepairHostIds(String repairType) + public Set getAutoRepairOnGoingRepairHostIds(String repairType) { return autoRepairProxy.getOnGoingRepairHostIds(repairType); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java index 22f9afc43fe1..bb594a010ff1 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java +++ b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java @@ -31,6 +31,9 @@ import static com.google.common.base.Preconditions.checkArgument; +/** + * Provides currently running auto-repair tasks. + */ @Command(name = "autorepairstatus", description = "Print autorepair status") public class AutoRepairStatus extends NodeTool.NodeToolCmd { @@ -52,7 +55,7 @@ public void execute(NodeProbe probe) TableBuilder table = new TableBuilder(); table.add("Active Repairs"); - Set ongoingRepairHostIds = probe.getOnGoingRepairHostIds(repairType); + Set ongoingRepairHostIds = probe.getAutoRepairOnGoingRepairHostIds(repairType); table.add(getSetString(ongoingRepairHostIds)); table.printTo(out); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index 961fb71fb4e4..9744498de757 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -25,6 +25,9 @@ import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; +/** + * Prints all the configurations for AutoRepair through nodetool. + */ @Command(name = "getautorepairconfig", description = "Print autorepair configurations") public class GetAutoRepairConfig extends NodeToolCmd { diff --git a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java index e1773fe47b7f..2a7b56732ac9 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java @@ -31,6 +31,9 @@ import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool; +/** + * Provides a way to set the repaired state of SSTables without any downtime through nodetool. + */ @Command(name = "sstablerepairedset", description = "Set the repaired state of SSTables for given keyspace/tables") public class SSTableRepairedSet extends NodeTool.NodeToolCmd { @@ -87,7 +90,7 @@ public void execute(NodeProbe probe) { sstableList.addAll(probe.mutateSSTableRepairedState(isRepaired, !reallySet, keyspace, tables.isEmpty() - ? probe.getTablesForKeyspace(keyspace) // mutate all tables + ? probe.getAutoRepairTablesForKeyspace(keyspace) // mutate all tables : tables)); // mutate specific tables } catch (InvalidRequestException e) diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index d55685e72426..9f22a43c4eb9 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -35,6 +35,9 @@ import static com.google.common.base.Preconditions.checkArgument; +/** + * Allows to set AutoRepair configuration through nodetool. + */ @Command(name = "setautorepairconfig", description = "sets the autorepair configuration") public class SetAutoRepairConfig extends NodeToolCmd { @@ -77,7 +80,7 @@ public void execute(NodeProbe probe) case "start_scheduler": if (Boolean.parseBoolean(paramVal)) { - probe.startScheduler(); + probe.startAutoRepairScheduler(); } return; case "history_clear_delete_hosts_buffer_interval": @@ -113,13 +116,13 @@ public void execute(NodeProbe probe) probe.setAutoRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "number_of_repair_threads": - probe.setRepairThreads(repairTypeStr, Integer.parseInt(paramVal)); + probe.setAutoRepairThreads(repairTypeStr, Integer.parseInt(paramVal)); break; case "min_repair_interval": - probe.setRepairMinInterval(repairTypeStr, paramVal); + probe.setAutoRepairMinInterval(repairTypeStr, paramVal); break; case "sstable_upper_threshold": - probe.setRepairSSTableCountHigherThreshold(repairTypeStr, Integer.parseInt(paramVal)); + probe.setAutoRepairSSTableCountHigherThreshold(repairTypeStr, Integer.parseInt(paramVal)); break; case "table_max_repair_time": probe.setAutoRepairTableMaxRepairTime(repairTypeStr, paramVal); @@ -127,11 +130,11 @@ public void execute(NodeProbe probe) case "priority_hosts": if (paramVal!= null && !paramVal.isEmpty()) { - probe.setRepairPriorityForHosts(repairTypeStr, paramVal); + probe.setAutoRepairPriorityForHosts(repairTypeStr, paramVal); } break; case "forcerepair_hosts": - probe.setForceRepairForHosts(repairTypeStr, paramVal); + probe.setAutoRepairForceRepairForHosts(repairTypeStr, paramVal); break; case "ignore_dcs": Set ignoreDCs = new HashSet<>(); @@ -142,19 +145,19 @@ public void execute(NodeProbe probe) probe.setAutoRepairIgnoreDCs(repairTypeStr, ignoreDCs); break; case "repair_primary_token_range_only": - probe.setPrimaryTokenRangeOnly(repairTypeStr, Boolean.parseBoolean(paramVal)); + probe.setAutoRepairPrimaryTokenRangeOnly(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "parallel_repair_count": - probe.setParallelRepairCount(repairTypeStr, Integer.parseInt(paramVal)); + probe.setAutoRepairParallelRepairCount(repairTypeStr, Integer.parseInt(paramVal)); break; case "parallel_repair_percentage": - probe.setParallelRepairPercentage(repairTypeStr, Integer.parseInt(paramVal)); + probe.setAutoRepairParallelRepairPercentage(repairTypeStr, Integer.parseInt(paramVal)); break; case "materialized_view_repair_enabled": - probe.setMaterializedViewRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); + probe.setAutoRepairMaterializedViewRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "repair_session_timeout": - probe.setRepairSessionTimeout(repairTypeStr, paramVal); + probe.setAutoRepairSessionTimeout(repairTypeStr, paramVal); break; default: throw new IllegalArgumentException("Unknown parameter: " + paramType); diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 67ba2b6009f1..39523892d6b2 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -42,6 +42,9 @@ import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.junit.Assert.assertEquals; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + */ public class AutoRepairSchedulerTest extends TestBaseImpl { diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 4b2486255adc..2bd1ae43116f 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -25,8 +25,8 @@ import java.io.IOError; import java.io.IOException; import java.io.InputStream; -import java.math.BigInteger; import java.lang.reflect.Field; +import java.math.BigInteger; import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; diff --git a/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java index 1b66086a4b71..8bf96329bacb 100644 --- a/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java +++ b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java @@ -37,6 +37,9 @@ import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.when; +/** + * Unit tests for {@link org.apache.cassandra.db.streaming.CassandraStreamReceiver} + */ public class CassandraStreamReceiverTest extends CQLTester { @Mock diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java index be14b8b912ea..ec36bb5cfd0c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.repair.autorepair; - import org.junit.Assert; import org.junit.Test; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 3bef45b8bd53..29e564cc3e12 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -44,6 +44,9 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig} + */ @RunWith(Parameterized.class) public class AutoRepairConfigTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java index 1337cf3dd2d3..241b1ac0de04 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -29,6 +29,9 @@ import org.apache.cassandra.config.DatabaseDescriptor; +/** + * Unit tests for {@link org.apache.cassandra.schema.SystemDistributedKeyspace} + */ public class AutoRepairKeyspaceTest { @BeforeClass diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 9e80bae37e59..d9471a5b7aef 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -78,6 +78,9 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + */ @RunWith(Parameterized.class) public class AutoRepairParameterizedTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java index 15d62c536a06..97e80364eed7 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -26,6 +26,9 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType} + */ public class AutoRepairStateFactoryTest { @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index 63e1ee4aaa0d..b4bb20c7eafd 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -49,6 +49,9 @@ import static org.mockito.Mockito.when; import static org.mockito.MockitoAnnotations.initMocks; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairState} + */ @RunWith(Parameterized.class) public class AutoRepairStateTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 0accb33b0d37..caae31c69db1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -42,6 +42,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + */ public class AutoRepairTest extends CQLTester { @BeforeClass diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 1f64638bd734..79b1ef8096df 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -62,6 +62,9 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairUtils} + */ public class AutoRepairUtilsTest extends CQLTester { static RepairType repairType = RepairType.INCREMENTAL; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index 360a270908e5..6326f55f9d5c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -51,6 +51,9 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter} + */ @RunWith(Parameterized.class) public class FixedSplitTokenRangeSplitterTest { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java index 7b95acb879de..3c4efc25ebab 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java @@ -30,6 +30,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.PrioritizedRepairPlan} + */ public class PrioritizedRepairPlanTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index d1f598682326..2470527c401a 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -57,6 +57,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter} + */ @RunWith(Parameterized.class) public class RepairTokenRangeSplitterTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java index 8c0588cb1244..26727f1b30b6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -44,6 +44,9 @@ import static org.junit.Assert.fail; +/** + * Unit tests to cover AutoRepair functionality inside {@link org.apache.cassandra.service.StorageService} + */ public class SSTableRepairedAtTest extends CQLTester { public static final String TEST_KEYSPACE = "test_keyspace"; diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index 358a9af78f86..b08e83a667d0 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -28,6 +28,9 @@ import static org.junit.Assert.assertEquals; +/** + * Unit tests for {@link org.apache.cassandra.service.AutoRepairService} + */ public class AutoRepairServiceBasicTest extends CQLTester { private static AutoRepairService autoRepairService; private static AutoRepairConfig config; diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java index fccac4762cb3..9288468b0019 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -37,6 +37,9 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.junit.Assert.assertEquals; +/** + * Unit tests covering different repair types for {@link org.apache.cassandra.service.AutoRepairService} + */ @RunWith(Parameterized.class) public class AutoRepairServiceRepairTypeTest extends CQLTester { @Parameterized.Parameter() diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index d3a99590de07..db87e995f558 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -48,6 +48,9 @@ import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.assertj.core.api.Assertions.assertThat; +/** + * Unit tests for (updating parameters through JMX) {@link org.apache.cassandra.service.AutoRepairService} + */ @RunWith(Parameterized.class) public class AutoRepairServiceSetterTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java index 1c7b04d50bcb..82293581d807 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -40,6 +40,9 @@ import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.when; +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.AutoRepairStatus} + */ @RunWith(Parameterized.class) public class AutoRepairStatusTest { @@ -94,7 +97,7 @@ public void testExecuteWithNoNodes() @Test public void testExecute() { - when(probe.getOnGoingRepairHostIds(repairType.name())).thenReturn(ImmutableSet.of("host1", "host2", "host3", "host4")); + when(probe.getAutoRepairOnGoingRepairHostIds(repairType.name())).thenReturn(ImmutableSet.of("host1", "host2", "host3", "host4")); cmd.repairType = repairType.name(); cmd.execute(probe); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java index bc168c0752d7..edd57447ad13 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java @@ -39,6 +39,9 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.SSTableRepairedSetTest} + */ public class SSTableRepairedSetTest { @Mock @@ -58,8 +61,8 @@ public void setUp() { public void testNoKeyspace() { when(probe.getNonLocalStrategyKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); - when(probe.getTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(Arrays.asList("table1", "table2"))); - when(probe.getTablesForKeyspace("ks2")).thenReturn(new ArrayList<>(Arrays.asList("table3", "table4"))); + when(probe.getAutoRepairTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(Arrays.asList("table1", "table2"))); + when(probe.getAutoRepairTablesForKeyspace("ks2")).thenReturn(new ArrayList<>(Arrays.asList("table3", "table4"))); cmd.isRepaired = true; cmd.reallySet = true; diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 335750a2f2b3..fab128fd299c 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -45,6 +45,9 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.SetAutoRepairConfig} + */ @RunWith(Suite.class) @Suite.SuiteClasses({ SetAutoRepairConfigTest.NoParamTests.class, SetAutoRepairConfigTest.RepairTypeParamTests.class, SetAutoRepairConfigTest.RepairTypeAndArgsParamsTests.class }) @@ -119,13 +122,13 @@ public void testStartScheduler() cmd.execute(probe); - verify(probe, times(0)).startScheduler(); + verify(probe, times(0)).startAutoRepairScheduler(); cmd.args = ImmutableList.of("start_scheduler", "true"); cmd.execute(probe); - verify(probe, times(1)).startScheduler(); + verify(probe, times(1)).startAutoRepairScheduler(); } @Test @@ -186,7 +189,7 @@ public void testRepairSchedulingDisabled() cmd.execute(probe); verify(out, times(1)).println("Auto-repair is not enabled"); - verify(probe, times(0)).setRepairThreads(repairType.name(), 1); + verify(probe, times(0)).setAutoRepairThreads(repairType.name(), 1); } @Test @@ -197,7 +200,7 @@ public void testRepairTypeDisabled() cmd.execute(probe); - verify(probe, times(1)).setRepairThreads(repairType.name(), 1); + verify(probe, times(1)).setAutoRepairThreads(repairType.name(), 1); } @@ -218,7 +221,7 @@ public void testV2FlagMissing() // expected } - verify(probe, times(0)).setRepairThreads(repairType.name(), 0); + verify(probe, times(0)).setAutoRepairThreads(repairType.name(), 0); } @Test(expected = IllegalArgumentException.class) @@ -239,7 +242,7 @@ public void testPriorityHosts() cmd.execute(probe); - verify(probe, times(1)).setRepairPriorityForHosts(repairType.name(), commaSeparatedHostSet); + verify(probe, times(1)).setAutoRepairPriorityForHosts(repairType.name(), commaSeparatedHostSet); } @Test @@ -251,7 +254,7 @@ public void testForceRepairHosts() cmd.execute(probe); - verify(probe, times(1)).setForceRepairForHosts(repairType.name(), commaSeparatedHostSet); + verify(probe, times(1)).setAutoRepairForceRepairForHosts(repairType.name(), commaSeparatedHostSet); } } @@ -275,14 +278,14 @@ public static Collection testCases() { return Stream.of( forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type.name(), true)), - forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setRepairThreads(type.name(), 1)), - forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setRepairMinInterval(type.name(), "3h")), - forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type.name(), 4)), + forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setAutoRepairThreads(type.name(), 1)), + forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setAutoRepairMinInterval(type.name(), "3h")), + forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setAutoRepairSSTableCountHigherThreshold(type.name(), 4)), forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type.name(), "5s")), - forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type.name(), true)), - forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type.name(), 6)), - forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type.name(), 7)), - forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setMaterializedViewRepairEnabled(type.name(), true)), + forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setAutoRepairPrimaryTokenRangeOnly(type.name(), true)), + forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setAutoRepairParallelRepairCount(type.name(), 6)), + forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setAutoRepairParallelRepairPercentage(type.name(), 7)), + forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairMaterializedViewRepairEnabled(type.name(), true)), forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type.name(), ImmutableSet.of("dc1", "dc2"))), forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type.name(), "max_bytes_per_schedule", "500GiB")) ).flatMap(Function.identity()).collect(Collectors.toList()); From f225ad67734bf7b52a2d7c7e690eee97090c412a Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 11 Feb 2025 15:34:20 -0800 Subject: [PATCH 139/257] Fix review comments from Andy --- src/java/org/apache/cassandra/config/ParameterizedClass.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/java/org/apache/cassandra/config/ParameterizedClass.java b/src/java/org/apache/cassandra/config/ParameterizedClass.java index 24f7307057e3..d772629f194c 100644 --- a/src/java/org/apache/cassandra/config/ParameterizedClass.java +++ b/src/java/org/apache/cassandra/config/ParameterizedClass.java @@ -20,7 +20,6 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.Collections; -import java.io.Serializable; import java.util.List; import java.util.Map; import java.util.function.Predicate; From 2e9cea6f50ea349bf69b03baeadbd59cb55a3cd3 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 11 Feb 2025 17:04:09 -0800 Subject: [PATCH 140/257] Additional review comments from Jaydeepkumar --- .../cassandra/metrics/AutoRepairMetrics.java | 21 +++++++++---------- .../repair/autorepair/AutoRepairConfig.java | 4 ++-- .../repair/autorepair/AutoRepairState.java | 4 ---- .../repair/autorepair/AutoRepairUtils.java | 11 ---------- .../FixedSplitTokenRangeSplitter.java | 3 --- .../autorepair/RepairAssignmentIterator.java | 1 - .../cassandra/schema/AutoRepairParams.java | 3 +-- .../cassandra/schema/SchemaKeyspace.java | 4 +--- .../schema/SystemDistributedKeyspace.java | 1 - .../apache/cassandra/schema/TableParams.java | 3 +-- .../service/AutoRepairServiceMBean.java | 3 --- .../org/apache/cassandra/tools/NodeProbe.java | 1 - .../apache/cassandra/utils/FBUtilities.java | 2 +- .../test/repair/AutoRepairSchedulerTest.java | 1 - .../config/YamlConfigurationLoaderTest.java | 10 ++++----- .../autorepair/AutoRepairConfigTest.java | 2 +- .../autorepair/AutoRepairKeyspaceTest.java | 4 +++- .../AutoRepairParameterizedTest.java | 1 - .../autorepair/AutoRepairUtilsTest.java | 4 +--- .../autorepair/PrioritizedRepairPlanTest.java | 2 -- .../RepairTokenRangeSplitterTest.java | 1 - .../autorepair/SSTableRepairedAtTest.java | 1 + .../service/AutoRepairServiceBasicTest.java | 2 -- .../AutoRepairServiceRepairTypeTest.java | 1 - .../nodetool/SetAutoRepairConfigTest.java | 1 - 25 files changed, 27 insertions(+), 64 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index 0f5cefd30899..ee8a0abfea70 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -33,20 +33,19 @@ public class AutoRepairMetrics { public static final String TYPE_NAME = "autorepair"; - public Gauge repairsInProgress; - public Gauge nodeRepairTimeInSec; - public Gauge clusterRepairTimeInSec; - public Gauge longestUnrepairedSec; - public Gauge succeededTokenRangesCount; - public Gauge failedTokenRangesCount; - public Gauge skippedTokenRangesCount; - public Gauge skippedTablesCount; + public final Gauge repairsInProgress; + public final Gauge nodeRepairTimeInSec; + public final Gauge clusterRepairTimeInSec; + public final Gauge longestUnrepairedSec; + public final Gauge succeededTokenRangesCount; + public final Gauge failedTokenRangesCount; + public final Gauge skippedTokenRangesCount; + public final Gauge skippedTablesCount; + public final Gauge totalMVTablesConsideredForRepair; + public final Gauge totalDisabledRepairTables; public Counter repairTurnMyTurn; public Counter repairTurnMyTurnDueToPriority; public Counter repairTurnMyTurnForceRepair; - public Gauge totalMVTablesConsideredForRepair; - public Gauge totalDisabledRepairTables; - public AutoRepairMetrics(RepairType repairType) { AutoRepairMetricsFactory factory = new AutoRepairMetricsFactory(repairType); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 19c3029011de..069a7378fa16 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -453,8 +453,8 @@ protected static Options getDefaultOptions() opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.materialized_view_repair_enabled = false; opts.token_range_splitter = new ParameterizedClass(DEFAULT_SPLITTER.getName(), Collections.emptyMap()); - opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); // 5 minutes - opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours + opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); + opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); return opts; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index dad7043d4fa7..3f987f90c652 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -83,17 +83,14 @@ public abstract class AutoRepairState implements ProgressListener protected int totalMVTablesConsideredForRepair = 0; @VisibleForTesting protected int totalDisabledTablesRepairCount = 0; - @VisibleForTesting protected int failedTokenRangesCount = 0; @VisibleForTesting protected int succeededTokenRangesCount = 0; @VisibleForTesting protected int skippedTokenRangesCount = 0; - @VisibleForTesting protected int skippedTablesCount = 0; - @VisibleForTesting protected AutoRepairHistory longestUnrepairedNode; @VisibleForTesting @@ -114,7 +111,6 @@ protected RepairCoordinator getRepairRunnable(String keyspace, RepairOption opti { RepairCoordinator task = new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace); - task.addProgressListener(this); return task; diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index f3f33e2c7a14..19f0558bea64 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -136,7 +136,6 @@ public class AutoRepairUtils COL_REPAIR_TYPE, COL_HOST_ID); final static String RECORD_FINISH_REPAIR_HISTORY = String.format( - "UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ?" , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); @@ -183,10 +182,8 @@ public static void setup() .forInternalCalls()); selectStatementRepairPriority = (SelectStatement) QueryProcessor.getStatement(SELECT_REPAIR_PRIORITY, ClientState .forInternalCalls()); - selectLastRepairTimeForNode = (SelectStatement) QueryProcessor.getStatement(SELECT_LAST_REPAIR_TIME_FOR_NODE, ClientState .forInternalCalls()); - delStatementPriorityStatus = (ModificationStatement) QueryProcessor.getStatement(DEL_REPAIR_PRIORITY, ClientState .forInternalCalls()); addPriorityHost = (ModificationStatement) QueryProcessor.getStatement(ADD_PRIORITY_HOST, ClientState @@ -390,14 +387,11 @@ public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) ByteBufferUtil.bytes(repairType.toString()), ByteBufferUtil.bytes(hostId))), Dispatcher.RequestTime.forImmediateExecution()); - UntypedResultSet repairTime = UntypedResultSet.create(rows.result); - if (repairTime.isEmpty()) { return 0; } - return repairTime.one().getLong(COL_REPAIR_FINISH_TS); } @@ -562,11 +556,9 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepair(repairType, autoRepairHistories == null ? 0 : autoRepairHistories.size()); logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); - if (currentRepairStatus == null || parallelRepairNumber > currentRepairStatus.hostIdsWithOnGoingRepair.size()) { // more repairs can be run, I might be the new one - if (autoRepairHistories != null) { logger.info("Auto repair history table has {} records", autoRepairHistories.size()); @@ -608,13 +600,11 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) "First node in priority list is {}", ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(priorityHostId))); return NOT_MY_TURN; } - if (myId.equals(priorityHostId)) { //I have a priority for repair hence its my turn now return MY_TURN_DUE_TO_PRIORITY; } - // get the longest unrepaired node from the nodes which are not running repair AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); //check who is next, which is helpful for debugging @@ -815,7 +805,6 @@ public static boolean shouldConsiderKeyspace(Keyspace ks) return repair; } - public static boolean tableMaxRepairTimeExceeded(RepairType repairType, long startTime) { long tableRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index a8d8b4744484..e27b8188bd2f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -77,7 +77,6 @@ public Iterator getRepairAssignments(boolean primaryR { return new RepairAssignmentIterator(repairPlans) { - @Override protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) { @@ -127,7 +126,6 @@ private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(boolean primar } } } - return new KeyspaceRepairAssignments(priority, keyspaceName, repairAssignments); } @@ -138,7 +136,6 @@ public void setParameter(String key, String value) { throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be " + NUMBER_OF_SUBRANGES); } - logger.info("Setting {} to {} for repair type {}", key, value, repairType); this.numberOfSubranges = Integer.parseInt(value); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java index fc5573a48944..44d9f5ef5e55 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java @@ -51,7 +51,6 @@ private synchronized Iterator currentIterator() currentIterator = currentPlan.getKeyspaceRepairPlans().iterator(); } } - return currentIterator; } diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index b2de800f83e0..a1111a84c51a 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -52,7 +52,7 @@ public String toString() } } - private ImmutableMap options; + private final ImmutableMap options; public static final Map DEFAULT_OPTIONS = ImmutableMap.of( LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.name()), Boolean.toString(true), @@ -152,7 +152,6 @@ public static boolean isValidInt(String value) return StringUtils.isNumeric(value); } - public Map options() { return options; diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index f7fe80d83d4c..21fad889d108 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -598,7 +598,6 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("extensions", params.extensions) .add("auto_repair", params.autoRepair.asMap()); - // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ // node sends table schema to a < 3.8 versioned node with an unknown column. if (DatabaseDescriptor.isCDCEnabled()) @@ -1087,8 +1086,7 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) .fastPath(getFastPathStrategy(row)) - .automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))) -; + .automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index f9c023e33b32..a1d03bbf8e03 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -184,7 +184,6 @@ private SystemDistributedKeyspace() private static final TableMetadata AutoRepairPriorityTable = parse(AUTO_REPAIR_PRIORITY, "Auto repair priority for each group", AUTO_REPAIR_PRIORITY_CQL).build(); - private static TableMetadata.Builder parse(String table, String description, String cql) { return CreateTableStatement.parse(format(cql, table), SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index df295b26b232..ffdfcccedf2d 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -101,8 +101,7 @@ public enum Option TRANSACTIONAL_MODE, TRANSACTIONAL_MIGRATION_FROM, PENDING_DROP, - AUTO_REPAIR, - ; + AUTO_REPAIR; @Override public String toString() diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 40948f3711a5..f3ec919c8f1b 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -25,9 +25,6 @@ */ public interface AutoRepairServiceMBean { - /** - * Enable or disable auto-repair for a given repair type - */ public void setAutoRepairEnabled(String repairType, boolean enabled); public void setRepairThreads(String repairType, int repairThreads); diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index a58f26c9aaf2..cab5153a70d3 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -182,7 +182,6 @@ public class NodeProbe implements AutoCloseable protected PermissionsCacheMBean pcProxy; protected RolesCacheMBean rcProxy; protected AutoRepairServiceMBean autoRepairProxy; - protected Output output; private boolean failed; diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index ed44b84164f1..fd7d24c1e502 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -65,7 +65,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; - +import com.vdurmont.semver4j.Semver; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 39523892d6b2..c6fa37f54dd7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -47,7 +47,6 @@ */ public class AutoRepairSchedulerTest extends TestBaseImpl { - private static Cluster cluster; static SimpleDateFormat sdf; diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 10075671e589..ace51ebe2ce8 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -278,11 +278,11 @@ public void fromMapTest() "optional", false, "enabled", true); Map autoRepairConfig = ImmutableMap.of("enabled", true, - "global_settings", ImmutableMap.of("number_of_repair_threads", - 1), - "repair_type_overrides", ImmutableMap.of( - "full", ImmutableMap.of("number_of_repair_threads", - 2))); + "global_settings", + ImmutableMap.of("number_of_repair_threads", 1), + "repair_type_overrides", + ImmutableMap.of("full", + ImmutableMap.of("number_of_repair_threads", 2))); Map map = new ImmutableMap.Builder() .put("storage_port", storagePort) .put("commitlog_sync", commitLogSync) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 29e564cc3e12..e8c60f2d9b50 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -52,7 +52,7 @@ public class AutoRepairConfigTest extends CQLTester { private AutoRepairConfig config; - private Set testSet = ImmutableSet.of("dc1"); + private final Set testSet = ImmutableSet.of("dc1"); @Parameterized.Parameter public AutoRepairConfig.RepairType repairType; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java index 241b1ac0de04..ac9dac8236e8 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -22,7 +22,9 @@ import java.util.Iterator; import java.util.Set; -import org.apache.cassandra.schema.*; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.apache.cassandra.schema.TableMetadata; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index d9471a5b7aef..cca5513e3712 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -728,7 +728,6 @@ public void testRepairThrowsForIRWithMVReplay() } } - @Test public void testRepairThrowsForIRWithCDCReplay() { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 79b1ef8096df..d79f0cb6b4c8 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -67,7 +67,7 @@ */ public class AutoRepairUtilsTest extends CQLTester { - static RepairType repairType = RepairType.INCREMENTAL; + static final RepairType repairType = RepairType.INCREMENTAL; static UUID hostId; static InetAddressAndPort localEndpoint; @@ -132,7 +132,6 @@ public void testSetForceRepairNewNode() assertTrue(result.one().getBoolean(COL_FORCE_REPAIR)); } - @Test public void testClearDeleteHosts() { @@ -262,7 +261,6 @@ public void testGetMaxNumberOfNodeRunAutoRepairInGroup_0_group_size() assertEquals(2, count); } - @Test public void testGetMaxNumberOfNodeRunAutoRepairInGroup_percentage() { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java index 3c4efc25ebab..38f9c8538846 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java @@ -35,7 +35,6 @@ */ public class PrioritizedRepairPlanTest extends CQLTester { - @Test public void testBuildWithDifferentPriorities() { @@ -162,5 +161,4 @@ public void testBuildWithOneTable() assertEquals(5, prioritizedRepairPlans.get(0).getPriority()); assertEquals(table1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); } - } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 2470527c401a..7da664e13c91 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -377,7 +377,6 @@ private SizeEstimate sizeEstimateByBytes(LongMebibytesBound sizeInRange, LongMeb return new SizeEstimate(RepairType.INCREMENTAL, KEYSPACE, "table1", FULL_RANGE, 1, sizeInRange.toBytes(), totalSize.toBytes()); } - private void insertAndFlushSingleTable() throws Throwable { execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java index 26727f1b30b6..14677490ca2d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -78,6 +78,7 @@ public void clearData() table2 = Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table2"); assert table2 != null; } + @Test public void testGetTablesForKeyspace() { diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index b08e83a667d0..821a39d9e1e6 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -67,7 +67,6 @@ public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() { assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); } - @Test public void testsetAutoRepairMaxRetriesCount() { autoRepairService.setAutoRepairMaxRetriesCount(101); @@ -75,7 +74,6 @@ public void testsetAutoRepairMaxRetriesCount() { assertEquals(101, config.getRepairMaxRetries()); } - @Test public void testsetAutoRepairRetryBackoffInSec() { autoRepairService.setAutoRepairRetryBackoff("102s"); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java index 9288468b0019..f2ee61930b09 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -55,7 +55,6 @@ public static Collection repairTypes() { return Arrays.asList(AutoRepairConfig.RepairType.values()); } - @BeforeClass public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index fab128fd299c..5a32c8285d48 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -203,7 +203,6 @@ public void testRepairTypeDisabled() verify(probe, times(1)).setAutoRepairThreads(repairType.name(), 1); } - @Test public void testV2FlagMissing() { From 65df16d00a97fd704dd2fec3d05227bb3d7ad8ad Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 13 Feb 2025 11:32:43 -0800 Subject: [PATCH 141/257] Additional review comments from Andy --- .../service/AutoRepairServiceBasicTest.java | 42 ++++++++++++------- .../AutoRepairServiceRepairTypeTest.java | 17 +++++--- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index 821a39d9e1e6..bd6fcd608ba3 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -31,12 +31,14 @@ /** * Unit tests for {@link org.apache.cassandra.service.AutoRepairService} */ -public class AutoRepairServiceBasicTest extends CQLTester { +public class AutoRepairServiceBasicTest extends CQLTester +{ private static AutoRepairService autoRepairService; private static AutoRepairConfig config; @Before - public void setUp() { + public void setUp() + { DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsEnabled(false); @@ -47,7 +49,8 @@ public void setUp() { } @Test - public void testSetup() { + public void testSetup() + { AutoRepairService.instance.config = null; AutoRepairService.setup(); @@ -56,40 +59,46 @@ public void testSetup() { } @Test - public void testGetAutoRepairConfigReturnsConfig() { + public void testGetAutoRepairConfigReturnsConfig() + { assertEquals(config, autoRepairService.getAutoRepairConfig()); } @Test - public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() { + public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() + { autoRepairService.setAutoRepairHistoryClearDeleteHostsBufferDuration("100s"); assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); } @Test - public void testsetAutoRepairMaxRetriesCount() { + public void testsetAutoRepairMaxRetriesCount() + { autoRepairService.setAutoRepairMaxRetriesCount(101); assertEquals(101, config.getRepairMaxRetries()); } @Test - public void testsetAutoRepairRetryBackoffInSec() { + public void testsetAutoRepairRetryBackoffInSec() + { autoRepairService.setAutoRepairRetryBackoff("102s"); assertEquals(102, config.getRepairRetryBackoff().toSeconds()); } @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { + public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() + { autoRepairService.config = new AutoRepairConfig(false); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisabled() { + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisabled() + { autoRepairService.config = new AutoRepairConfig(true); autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); @@ -97,7 +106,8 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisa } @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { + public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() + { autoRepairService.config = new AutoRepairConfig(true); autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); @@ -105,7 +115,8 @@ public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { } @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() + { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setMaterializedViewsEnabled(true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); @@ -113,7 +124,8 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { } @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled() { + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled() + { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(false); @@ -121,7 +133,8 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled } @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { + public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() + { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); @@ -129,7 +142,8 @@ public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { } @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() { + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() + { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCEnabled(true); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java index f2ee61930b09..9c2af3e1c793 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -19,9 +19,11 @@ package org.apache.cassandra.service; import com.google.common.collect.ImmutableSet; + import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.repair.autorepair.AutoRepairUtils; + import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -41,7 +43,8 @@ * Unit tests covering different repair types for {@link org.apache.cassandra.service.AutoRepairService} */ @RunWith(Parameterized.class) -public class AutoRepairServiceRepairTypeTest extends CQLTester { +public class AutoRepairServiceRepairTypeTest extends CQLTester +{ @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; @@ -51,25 +54,29 @@ public class AutoRepairServiceRepairTypeTest extends CQLTester { private AutoRepairService instance; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() { + public static Collection repairTypes() + { return Arrays.asList(AutoRepairConfig.RepairType.values()); } @BeforeClass - public static void setupClass() throws Exception { + public static void setupClass() throws Exception + { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); setAutoRepairEnabled(true); requireNetwork(); } @Before - public void setUpTest() { + public void setUpTest() + { AutoRepairUtils.setup(); instance = new AutoRepairService(); } @Test - public void testGetOnGoingRepairHostIdsTest() { + public void testGetOnGoingRepairHostIdsTest() + { long now = System.currentTimeMillis(); AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); From b9e11a1193f367c230a42339363c33200a5a3154 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 13 Feb 2025 12:05:15 -0800 Subject: [PATCH 142/257] Additional review comments from Jaydeepkumar --- NEWS.txt | 5 +++++ .../config/YamlConfigurationLoaderTest.java | 2 +- .../autorepair/RepairTokenRangeSplitterTest.java | 15 ++++++++------- .../service/ActiveRepairServiceTest.java | 1 - 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/NEWS.txt b/NEWS.txt index b35a3c02745b..5cb99edcab73 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -72,6 +72,11 @@ using the provided 'sstableupgrade' tool. New features ------------ [The following is a placeholder, to be revised asap] + - CEP-37 Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This + significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit + and manage repairs. See + https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution for more + details on the motivation and design - CEP-21 Transactional Cluster Metadata introduces a distributed log for linearizing modifications to cluster metadata. In the first instance, this encompasses cluster membership, token ownership and schema metadata. See https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-21%3A+Transactional+Cluster+Metadata for more detail on diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index ace51ebe2ce8..68d5302047be 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -302,7 +302,7 @@ public void fromMapTest() assertEquals(true, config.client_encryption_options.enabled); // Check a nested object assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_send_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) - assertEquals(true, config.auto_repair.enabled); + assertTrue(config.auto_repair.enabled); assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.INCREMENTAL)); config.auto_repair.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 7da664e13c91..deea6a33f1a3 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -126,8 +126,8 @@ public void testSizePartitionCountSplit() throws Throwable Range tokenRange2 = range.next(); Assert.assertFalse(range.hasNext()); - try(Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); - Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2)) + try (Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); + Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2)) { SizeEstimate sizes1 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); SizeEstimate sizes2 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); @@ -330,23 +330,23 @@ public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxByte FilteredRepairAssignments filteredRepairAssignments = repairRangeSplitter.filterRepairAssignments(0, KEYSPACE, assignments, 0); List finalRepairAssignments = filteredRepairAssignments.repairAssignments; assertEquals(2, finalRepairAssignments.size()); - assertEquals(expectedBytes*2, filteredRepairAssignments.newBytesSoFar); + assertEquals(expectedBytes * 2, filteredRepairAssignments.newBytesSoFar); } - @Test(expected=IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testSetParameterShouldNotAllowUnknownParameter() { repairRangeSplitter.setParameter("unknown", "x"); } - @Test(expected=IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testSetParameterShouldNotAllowSettingBytesPerAssignmentGreaterThanMaxBytesPerSchedule() { repairRangeSplitter.setParameter(MAX_BYTES_PER_SCHEDULE, "500GiB"); repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "600GiB"); } - @Test(expected=IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testSetParameterShouldNotAllowSettingMaxBytesPerScheduleLessThanBytesPerAssignment() { repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "100MiB"); @@ -386,7 +386,8 @@ private void insertAndFlushSingleTable() throws Throwable private List createAndInsertTables(int count) throws Throwable { List tableNames = new ArrayList<>(); - for (int i = 0; i < count; i++) { + for (int i = 0; i < count; i++) + { String tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); tableNames.add(tableName); insertAndFlushTable(tableName); diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index cb90cc725295..6ccfbcbfa971 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -247,7 +247,6 @@ public void testGetNeighborsSpecifiedHostsWithNoLocalHost() throws Throwable ActiveRepairService.instance().getNeighbors(KEYSPACE5, ranges, ranges.iterator().next(), null, hosts); } - @Test public void testParentRepairStatus() throws Throwable { From 9a73fc5e360ec9e1eb5f2d11ed0c4c326323000e Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 14 Feb 2025 12:37:37 -0800 Subject: [PATCH 143/257] Incorporate NEWS.txt review comment --- NEWS.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.txt b/NEWS.txt index 5cb99edcab73..2026fb09a762 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -76,7 +76,7 @@ New features significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit and manage repairs. See https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution for more - details on the motivation and design + details on the motivation and design. - CEP-21 Transactional Cluster Metadata introduces a distributed log for linearizing modifications to cluster metadata. In the first instance, this encompasses cluster membership, token ownership and schema metadata. See https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-21%3A+Transactional+Cluster+Metadata for more detail on From efa146e19f7ea355840c0e6969a3609335accab7 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 20 Feb 2025 16:21:10 -0800 Subject: [PATCH 144/257] A new table property should be wrapped inside a config --- pylib/cqlshlib/cql3handling.py | 3 + pylib/cqlshlib/test/test_cqlsh_completion.py | 6 +- .../repair/autorepair/AutoRepairConfig.java | 6 ++ .../cassandra/schema/SchemaKeyspace.java | 19 ++++- .../AutoRepairTablePropertyTest.java | 84 +++++++++++++++++++ 5 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py index f1baf2ad2952..d2e8e0de63c7 100644 --- a/pylib/cqlshlib/cql3handling.py +++ b/pylib/cqlshlib/cql3handling.py @@ -56,6 +56,7 @@ class Cql3ParsingRuleSet(CqlParsingRuleSet): ('memtable_flush_period_in_ms', None), ('cdc', None), ('read_repair', None), + ('auto_repair', None), ) columnfamily_layout_map_options = ( @@ -559,6 +560,8 @@ def cf_prop_val_completer(ctxt, cass): return [Hint('')] if this_opt == 'incremental_backups': return [Hint('')] + if this_opt == 'auto_repair': + return ["{'full_enabled': '"] return [Hint('')] diff --git a/pylib/cqlshlib/test/test_cqlsh_completion.py b/pylib/cqlshlib/test/test_cqlsh_completion.py index bf2385a46fef..90cddf35bce1 100644 --- a/pylib/cqlshlib/test/test_cqlsh_completion.py +++ b/pylib/cqlshlib/test/test_cqlsh_completion.py @@ -678,7 +678,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH ', choices=['allow_auto_snapshot', 'bloom_filter_fp_chance', 'compaction', @@ -690,7 +690,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH bloom_filter_fp_chance ', immediate='= ') self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH bloom_filter_fp_chance = ', @@ -740,7 +740,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) self.trycompletions(prefix + " new_table (col_a int PRIMARY KEY) WITH compaction = " + "{'class': 'TimeWindowCompactionStrategy', '", choices=['compaction_window_unit', 'compaction_window_size', diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 069a7378fa16..ae3a70476993 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -147,6 +147,12 @@ public boolean isAutoRepairSchedulingEnabled() return enabled; } + @VisibleForTesting + public void setAutoRepairSchedulingEnabled(boolean enabled) + { + this.enabled = enabled; + } + public DurationSpec.IntSecondsBound getAutoRepairHistoryClearDeleteHostsBufferInterval() { return history_clear_delete_hosts_buffer_interval; diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 21fad889d108..7781ef29b9e6 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -595,8 +595,7 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("compaction", params.compaction.asMap()) .add("compression", params.compression.asMap()) .add("read_repair", params.readRepair.toString()) - .add("extensions", params.extensions) - .add("auto_repair", params.autoRepair.asMap()); + .add("extensions", params.extensions); // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ // node sends table schema to a < 3.8 versioned node with an unknown column. @@ -620,6 +619,13 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui if (DatabaseDescriptor.getAccordTransactionsEnabled() && !forView) builder.add("fast_path", params.fastPath.asMap()); + + // As above, only add the auto_repair column if the scheduler is enabled + // to avoid RTE in pre-5.1 versioned node during upgrades + if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + builder.add("auto_repair", params.autoRepair.asMap()); + } } private static void addAlterTableToSchemaMutation(TableMetadata oldTable, TableMetadata newTable, Mutation.SimpleBuilder builder) @@ -1085,8 +1091,7 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) SpeculativeRetryPolicy.fromString("99PERCENTILE")) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) - .fastPath(getFastPathStrategy(row)) - .automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))); + .fastPath(getFastPathStrategy(row)); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) @@ -1096,6 +1101,12 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) if (row.has("incremental_backups")) builder.incrementalBackups(row.getBoolean("incremental_backups")); + // auto_repair column was introduced in 5.1 + if (row.has("auto_repair")) + { + builder.automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))); + } + return builder.build(); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java new file mode 100644 index 000000000000..51551e46369d --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Map; + +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.SimpleBuilders; +import org.apache.cassandra.db.rows.ColumnData; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SchemaKeyspace; +import org.apache.cassandra.schema.SchemaKeyspaceTables; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +/** + * Unit tests that verifies "auto_repair" is not included in Schema mutation + * {@link org.apache.cassandra.schema.SchemaKeyspace} if AutoRepair is disabled + */ +public class AutoRepairTablePropertyTest extends CQLTester +{ + @Test + public void testSchedulerDisabledNoColumnReturned() + { + helperTestTableProperty(false); + } + + @Test + public void testSchedulerEnabledShouldReturnColumnReturned() + { + helperTestTableProperty(true); + } + + public void helperTestTableProperty(boolean autoRepairOn) + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(autoRepairOn); + + Map systemSchemaTables = Map.of(SchemaKeyspaceTables.TABLES, "table_name", SchemaKeyspaceTables.VIEWS, "view_name"); + for (Map.Entry systemSchema : systemSchemaTables.entrySet()) + { + ColumnFamilyStore tables = Keyspace.open(SchemaConstants.SCHEMA_KEYSPACE_NAME).getColumnFamilyStore(systemSchema.getKey()); + SimpleBuilders.RowBuilder builder = new SimpleBuilders.RowBuilder(tables.metadata(), systemSchema.getValue()); + SchemaKeyspace.addTableParamsToRowBuilder(tables.metadata().params, builder); + Row row = builder.build(); + ColumnMetadata autoRepair = tables.metadata().getColumn(ByteBufferUtil.bytes("auto_repair")); + ColumnData data = row.getCell(autoRepair); + if (autoRepairOn) + { + assertNotNull(data); + } + else + { + // if AutoRepair is not enabled, the column should not be returned + // as part of the system_schema.tables mutation + assertNull(data); + } + } + } +} From cf291f8d6298706f5e421e53381a82f1f71a2c42 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 21 Feb 2025 08:32:38 -0800 Subject: [PATCH 145/257] Skip displaying the table property if AutoRepair is disabled --- src/java/org/apache/cassandra/schema/TableParams.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index ffdfcccedf2d..c56ddd4ab41f 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -28,6 +28,7 @@ import com.google.common.collect.Maps; import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.Attributes; import org.apache.cassandra.cql3.CqlBuilder; import org.apache.cassandra.exceptions.ConfigurationException; @@ -419,9 +420,12 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) .newLine(); } - builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()) - .newLine() - .append("AND auto_repair = ").append(autoRepair.asMap()); + builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); + if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + builder.newLine() + .append("AND auto_repair = ").append(autoRepair.asMap()); + } } public static final class Builder From c41c7d6df89639efa69c2e6c362b3577850edcc8 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 21 Feb 2025 09:46:59 -0800 Subject: [PATCH 146/257] Fix broken tests --- pylib/cqlshlib/cql3handling.py | 3 --- pylib/cqlshlib/test/test_cqlsh_completion.py | 6 +++--- pylib/cqlshlib/test/test_cqlsh_output.py | 3 +-- .../cassandra/cql3/statements/DescribeStatementTest.java | 5 ++--- test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java | 3 +-- 5 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py index d2e8e0de63c7..f1baf2ad2952 100644 --- a/pylib/cqlshlib/cql3handling.py +++ b/pylib/cqlshlib/cql3handling.py @@ -56,7 +56,6 @@ class Cql3ParsingRuleSet(CqlParsingRuleSet): ('memtable_flush_period_in_ms', None), ('cdc', None), ('read_repair', None), - ('auto_repair', None), ) columnfamily_layout_map_options = ( @@ -560,8 +559,6 @@ def cf_prop_val_completer(ctxt, cass): return [Hint('')] if this_opt == 'incremental_backups': return [Hint('')] - if this_opt == 'auto_repair': - return ["{'full_enabled': '"] return [Hint('')] diff --git a/pylib/cqlshlib/test/test_cqlsh_completion.py b/pylib/cqlshlib/test/test_cqlsh_completion.py index 90cddf35bce1..bf2385a46fef 100644 --- a/pylib/cqlshlib/test/test_cqlsh_completion.py +++ b/pylib/cqlshlib/test/test_cqlsh_completion.py @@ -678,7 +678,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH ', choices=['allow_auto_snapshot', 'bloom_filter_fp_chance', 'compaction', @@ -690,7 +690,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH bloom_filter_fp_chance ', immediate='= ') self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH bloom_filter_fp_chance = ', @@ -740,7 +740,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) self.trycompletions(prefix + " new_table (col_a int PRIMARY KEY) WITH compaction = " + "{'class': 'TimeWindowCompactionStrategy', '", choices=['compaction_window_unit', 'compaction_window_size', diff --git a/pylib/cqlshlib/test/test_cqlsh_output.py b/pylib/cqlshlib/test/test_cqlsh_output.py index ec400cf7d735..c32690b42496 100644 --- a/pylib/cqlshlib/test/test_cqlsh_output.py +++ b/pylib/cqlshlib/test/test_cqlsh_output.py @@ -701,8 +701,7 @@ def test_describe_columnfamily_output(self): AND read_repair = 'BLOCKING' AND transactional_mode = 'off' AND transactional_migration_from = 'none' - AND speculative_retry = '99p' - AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};""" % quote_name(get_keyspace())) + AND speculative_retry = '99p';""" % quote_name(get_keyspace())) with cqlsh_testrun(tty=True, env=self.default_env) as c: for cmdword in ('describe table', 'desc columnfamily'): diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index 9f27ee04a7fe..f96a0bbc35f2 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -1144,7 +1144,7 @@ private static String tableParametersCql() " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = '99p'\n" + - " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + " AND speculative_retry = '99p';"; } private static String cqlQuoted(Map map) @@ -1171,8 +1171,7 @@ private static String mvParametersCql() " AND memtable_flush_period_in_ms = 0\n" + " AND min_index_interval = 128\n" + " AND read_repair = 'BLOCKING'\n" + - " AND speculative_retry = '99p'\n" + - " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + " AND speculative_retry = '99p';"; } private static String keyspaceOutput() diff --git a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java index a328790cce9d..da5f6d1853cb 100644 --- a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java +++ b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java @@ -349,8 +349,7 @@ public void testCfmOptionsCQL() " AND read_repair = 'BLOCKING'\n" + " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + - " AND speculative_retry = 'ALWAYS'\n" + - " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};" + " AND speculative_retry = 'ALWAYS';" )); } From ef0d0ad10c041e383643ef71605e6004edb53a34 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 21 Feb 2025 14:26:45 -0800 Subject: [PATCH 147/257] Extend DescribeStatementTest to cover properties when AutoRepair scheduler is enabled --- .../statements/DescribeStatementTest.java | 149 +++++++++++++----- 1 file changed, 107 insertions(+), 42 deletions(-) diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index f96a0bbc35f2..a7cbf41a99fd 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -270,6 +270,19 @@ public void testDescribeVirtualTables() throws Throwable @Test public void testDescribe() throws Throwable + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(false); + helperTestDescribe(); + } + + @Test + public void testDescribeWithAutoRepair() throws Throwable + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(true); + helperTestDescribe(); + } + + public void helperTestDescribe() throws Throwable { try { @@ -833,7 +846,7 @@ public void testDescribeCreateLikeTable() throws Throwable requireNetwork(); DatabaseDescriptor.setDynamicDataMaskingEnabled(true); String souceTable = createTable(KEYSPACE_PER_TEST, - "CREATE TABLE %s (" + + "CREATE TABLE %s (" + " pk1 text, " + " pk2 int MASKED WITH DEFAULT, " + " ck1 int, " + @@ -1122,29 +1135,56 @@ private static String testTableOutput() private static String tableParametersCql() { - return "additional_write_policy = '99p'\n" + - " AND allow_auto_snapshot = true\n" + - " AND bloom_filter_fp_chance = 0.01\n" + - " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + - " AND cdc = false\n" + - " AND comment = ''\n" + - " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + - " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + - " AND memtable = 'default'\n" + - " AND crc_check_chance = 1.0\n" + - " AND fast_path = 'keyspace'\n" + - " AND default_time_to_live = 0\n" + - " AND extensions = {}\n" + - " AND gc_grace_seconds = 864000\n" + - " AND incremental_backups = true\n" + - " AND max_index_interval = 2048\n" + - " AND memtable_flush_period_in_ms = 0\n" + - " AND min_index_interval = 128\n" + - " AND read_repair = 'BLOCKING'\n" + - " AND transactional_mode = 'off'\n" + - " AND transactional_migration_from = 'none'\n" + - " AND speculative_retry = '99p'\n" + - " AND speculative_retry = '99p';"; + if (!DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND fast_path = 'keyspace'\n" + + " AND default_time_to_live = 0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND transactional_mode = 'off'\n" + + " AND transactional_migration_from = 'none'\n" + + " AND speculative_retry = '99p';"; + } + else + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND default_time_to_live = 0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND transactional_mode = 'off'\n" + + " AND transactional_migration_from = 'none'\n" + + " AND speculative_retry = '99p'\n" + + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + } } private static String cqlQuoted(Map map) @@ -1154,24 +1194,49 @@ private static String cqlQuoted(Map map) private static String mvParametersCql() { - return "additional_write_policy = '99p'\n" + - " AND allow_auto_snapshot = true\n" + - " AND bloom_filter_fp_chance = 0.01\n" + - " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + - " AND cdc = false\n" + - " AND comment = ''\n" + - " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + - " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + - " AND memtable = 'default'\n" + - " AND crc_check_chance = 1.0\n" + - " AND extensions = {}\n" + - " AND gc_grace_seconds = 864000\n" + - " AND incremental_backups = true\n" + - " AND max_index_interval = 2048\n" + - " AND memtable_flush_period_in_ms = 0\n" + - " AND min_index_interval = 128\n" + - " AND read_repair = 'BLOCKING'\n" + - " AND speculative_retry = '99p';"; + if (!DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND speculative_retry = '99p';"; + } + else + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND speculative_retry = '99p'\n" + + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + } } private static String keyspaceOutput() From 4f2af32cefb9b8915802b3829a4326ac82635ec9 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Thu, 30 Jan 2025 11:41:56 -0800 Subject: [PATCH 148/257] Fix race condition in auto-repair scheduler --- .../repair/autorepair/AutoRepair.java | 78 ++++++- .../repair/autorepair/AutoRepairState.java | 63 +----- .../AutoRepairParameterizedTest.java | 206 ++++++++++++++---- .../autorepair/AutoRepairStateTest.java | 101 +-------- 4 files changed, 240 insertions(+), 208 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index bd656a46340d..ed28bfcf1afb 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -18,6 +18,7 @@ package org.apache.cassandra.repair.autorepair; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.EnumMap; import java.util.HashSet; @@ -56,12 +57,17 @@ import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; +import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.apache.cassandra.utils.progress.ProgressEventType; +import org.apache.cassandra.utils.progress.ProgressListener; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; /** * AutoRepair scheduler responsible for running different types of repairs. @@ -69,12 +75,11 @@ public class AutoRepair { private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); + private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); @VisibleForTesting protected static Supplier timeFunc = Clock.Global::currentTimeMillis; - public static AutoRepair instance = new AutoRepair(); - // Sleep for 5 seconds if repair finishes quickly to flush JMX metrics; it happens only for Cassandra nodes with tiny amount of data. public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); @@ -95,6 +100,7 @@ public class AutoRepair protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; private boolean isSetupDone = false; + public static AutoRepair instance = new AutoRepair(); @VisibleForTesting protected AutoRepair() @@ -304,26 +310,29 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim if ((totalProcessedAssignments % config.getRepairThreads(repairType) == 0) || (totalProcessedAssignments == totalRepairAssignments)) { + boolean success = false; int retryCount = 0; Future f = null; while (retryCount <= config.getRepairMaxRetries()) { RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, - Lists.newArrayList(curRepairAssignment.getTableNames()), - ranges, primaryRangeOnly); - repairState.resetWaitCondition(); + Lists.newArrayList(curRepairAssignment.getTableNames()), + ranges, primaryRangeOnly); + RepairProgressListener listener = new RepairProgressListener(repairType); + task.addProgressListener(listener); f = repairRunnableExecutors.get(repairType).submit(task); try { long jobStartTime = timeFunc.get(); - repairState.waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); + listener.await(); + success = listener.isSuccess(); soakAfterRepair(jobStartTime, config.getRepairTaskMinDuration().toMilliseconds()); } catch (InterruptedException e) { logger.error("Exception in cond await:", e); } - if (repairState.isSuccess()) + if (success) { break; } @@ -339,7 +348,7 @@ else if (retryCount < config.getRepairMaxRetries()) retryCount++; } //check repair status - if (repairState.isSuccess()) + if (success) { logger.info("Repair completed for range {}-{} for {} tables {}, total assignments: {}," + "processed assignments: {}", tokenRange.left, tokenRange.right, @@ -500,4 +509,57 @@ static class CollectedRepairStats int skippedTokenRanges = 0; int skippedTables = 0; } + + @VisibleForTesting + protected static class RepairProgressListener implements ProgressListener + { + private final AutoRepairConfig.RepairType repairType; + @VisibleForTesting + protected boolean success; + @VisibleForTesting + protected final Condition condition = newOneTimeCondition(); + + public RepairProgressListener(AutoRepairConfig.RepairType repairType) + { + this.repairType = repairType; + } + + public void await() throws InterruptedException + { + //if for some reason we don't hear back on repair progress for sometime + if (!condition.await(12, TimeUnit.HOURS)) + { + success = false; + } + } + + public boolean isSuccess() + { + return success; + } + + @Override + public void progress(String tag, ProgressEvent event) + { + ProgressEventType type = event.getType(); + String message = String.format("[%s] %s", format.format(System.currentTimeMillis()), event.getMessage()); + if (type == ProgressEventType.ERROR) + { + logger.error("Repair failure for repair {}: {}", repairType.toString(), message); + success = false; + condition.signalAll(); + } + if (type == ProgressEventType.PROGRESS) + { + message = message + " (progress: " + (int) event.getProgressPercentage() + "%)"; + logger.debug("Repair progress for repair {}: {}", repairType.toString(), message); + } + if (type == ProgressEventType.COMPLETE) + { + logger.debug("Repair completed for repair {}: {}", repairType.toString(), message); + success = true; + condition.signalAll(); + } + } + } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index 3f987f90c652..fda1a2470d50 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -20,7 +20,6 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.view.TableViews; @@ -37,10 +36,6 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.concurrent.Condition; -import org.apache.cassandra.utils.progress.ProgressEvent; -import org.apache.cassandra.utils.progress.ProgressEventType; -import org.apache.cassandra.utils.progress.ProgressListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,13 +47,10 @@ import java.util.function.Supplier; import java.util.stream.Collectors; -import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; -import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; - /** * AutoRepairState represents the state of automated repair for a given repair type. */ -public abstract class AutoRepairState implements ProgressListener +public abstract class AutoRepairState { protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); @@ -93,10 +85,6 @@ public abstract class AutoRepairState implements ProgressListener protected int skippedTablesCount = 0; @VisibleForTesting protected AutoRepairHistory longestUnrepairedNode; - @VisibleForTesting - protected Condition condition = newOneTimeCondition(); - @VisibleForTesting - protected boolean success = true; protected final AutoRepairMetrics metrics; protected AutoRepairState(RepairType repairType) @@ -109,43 +97,8 @@ protected AutoRepairState(RepairType repairType) protected RepairCoordinator getRepairRunnable(String keyspace, RepairOption options) { - RepairCoordinator task = new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), - options, keyspace); - task.addProgressListener(this); - - return task; - } - - @Override - public void progress(String tag, ProgressEvent event) - { - ProgressEventType type = event.getType(); - String message = String.format("[%s] %s", format.format(currentTimeMillis()), event.getMessage()); - if (type == ProgressEventType.ERROR) - { - logger.error("Repair failure for {} repair: {}", repairType.toString(), message); - success = false; - condition.signalAll(); - } - if (type == ProgressEventType.PROGRESS) - { - message = message + " (progress: " + (int) event.getProgressPercentage() + "%)"; - logger.debug("Repair progress for {} repair: {}", repairType.toString(), message); - } - if (type == ProgressEventType.COMPLETE) - { - success = true; - condition.signalAll(); - } - } - - public void waitForRepairToComplete(DurationSpec.IntSecondsBound repairSessionTimeout) throws InterruptedException - { - //if for some reason we don't hear back on repair progress for sometime - if (!condition.await(repairSessionTimeout.toSeconds(), TimeUnit.SECONDS)) - { - success = false; - } + return new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), + options, keyspace); } public long getLastRepairTime() @@ -272,11 +225,6 @@ public int getSkippedTablesCount() return skippedTablesCount; } - public boolean isSuccess() - { - return success; - } - public void recordTurn(AutoRepairUtils.RepairTurn turn) { metrics.recordTurn(turn); @@ -291,11 +239,6 @@ public int getTotalDisabledTablesRepairCount() { return totalDisabledTablesRepairCount; } - - public void resetWaitCondition() - { - condition = newOneTimeCondition(); - } } class PreviewRepairedState extends AutoRepairState diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index cca5513e3712..a6ed14ff77ea 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -24,8 +24,10 @@ import java.util.List; import java.util.Set; import java.util.UUID; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import com.google.common.collect.Sets; @@ -34,6 +36,8 @@ import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.LocalStrategy; +import org.apache.cassandra.repair.RepairParallelism; +import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.service.StorageService; @@ -57,10 +61,15 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.apache.cassandra.utils.progress.ProgressEventType; +import org.apache.cassandra.utils.progress.ProgressListener; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; +import org.mockito.invocation.InvocationOnMock; import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; @@ -74,6 +83,7 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -246,7 +256,6 @@ public void testRepairAsync() AutoRepair.instance.repairAsync(repairType); verify(mockExecutor, Mockito.times(1)).submit(Mockito.any(Runnable.class)); - } @Test @@ -502,6 +511,10 @@ public void testMetrics() AutoRepair.instance.repairStates.put(repairType, autoRepairState); when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); when(autoRepairState.getFailedTokenRangesCount()).thenReturn(10); when(autoRepairState.getSucceededTokenRangesCount()).thenReturn(11); when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); @@ -514,29 +527,29 @@ public void testMetrics() } @Test - public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws Exception + public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setMaterializedViewRepairEnabled(repairType, false); - config.setRepairRetryBackoff("0s"); - when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) - .thenReturn(repairRunnable); AutoRepair.instance.repairStates.put(repairType, autoRepairState); when(autoRepairState.getLastRepairTime()).thenReturn((long) 0); - AtomicInteger resetWaitConditionCalls = new AtomicInteger(); - AtomicInteger waitForRepairCompletedCalls = new AtomicInteger(); + AtomicInteger getRepairRunnableCalls = new AtomicInteger(); + AtomicReference prevListener = new AtomicReference<>(); doAnswer(invocation -> { - resetWaitConditionCalls.getAndIncrement(); - assertEquals("waitForRepairToComplete was called before resetWaitCondition", - resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get() + 1); - return null; - }).when(autoRepairState).resetWaitCondition(); + if (getRepairRunnableCalls.getAndIncrement() > 0) + { + // progress listener from previous repair should be signalled before starting new repair + assertTrue(prevListener.get().condition.isSignalled()); + } + getRepairRunnableCalls.incrementAndGet(); + return repairRunnable; + }).when(autoRepairState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); doAnswer(invocation -> { - waitForRepairCompletedCalls.getAndIncrement(); - assertEquals("resetWaitCondition was not called before waitForRepairToComplete", - resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get()); + // sending out a COMPLETE event with a 10ms delay + Executors.newScheduledThreadPool(1).schedule(() -> { + invocation.getArgument(0, AutoRepair.RepairProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + }, 10, TimeUnit.MILLISECONDS); return null; - }).when(autoRepairState).waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); + }).when(repairRunnable).addProgressListener(Mockito.any()); AutoRepair.instance.repair(repairType); AutoRepair.instance.repair(repairType); @@ -632,7 +645,10 @@ public void testRepairTakesLastRepairTimeFromDB() public void testRepairMaxRetries() { when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); - when(autoRepairState.isSuccess()).thenReturn(false); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); AtomicInteger sleepCalls = new AtomicInteger(); AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { @@ -646,10 +662,10 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); // Expect configured retries for each table expected to be repaired - assertEquals(config.getRepairMaxRetries() * expectedRepairAssignments, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(expectedRepairAssignments); + assertEquals(config.getRepairMaxRetries()*expectedRepairAssignments, sleepCalls.get()); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(0); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(expectedRepairAssignments); } @Test @@ -664,20 +680,27 @@ public void testRepairSuccessAfterRetry() assertEquals(TimeUnit.SECONDS, unit); assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); }; - when(autoRepairState.isSuccess()).then((invocationOnMock) -> { - if (sleepCalls.get() == 0) { - return false; + doAnswer(invocation -> { + if (sleepCalls.get() == 0) + { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + } + else + { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); } - return true; - }); + + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); config.setRepairMinInterval(repairType, "0s"); + config.setRepairMaxRetries(1); AutoRepair.instance.repairStates.put(repairType, autoRepairState); AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); } @Test @@ -755,8 +778,11 @@ public void testRepairThrowsForIRWithCDCReplay() @Test public void testSoakAfterImmediateRepair() { - when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); - when(autoRepairState.isSuccess()).thenReturn(true); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.repair_task_min_duration = new DurationSpec.LongSecondsBound("10s"); AtomicInteger sleepCalls = new AtomicInteger(); @@ -772,16 +798,19 @@ public void testSoakAfterImmediateRepair() AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); } @Test public void testNoSoakAfterRepair() { - when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); - when(autoRepairState.isSuccess()).thenReturn(true); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { @@ -792,8 +821,105 @@ public void testNoSoakAfterRepair() AutoRepair.instance.repair(repairType); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); + } + + @Test + public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() + { + RepairOption options = new RepairOption(RepairParallelism.PARALLEL, true, repairType == AutoRepairConfig.RepairType.INCREMENTAL, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), Set.of(), + false, false, PreviewKind.NONE, false, true, false, false, false, false); + AutoRepairState repairState = AutoRepair.instance.repairStates.get(repairType); + AutoRepairState spyState = spy(repairState); + AtomicReference failingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); + AtomicReference failingListener = new AtomicReference<>(); + AtomicReference succeedingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); + AtomicInteger repairRunableCalls = new AtomicInteger(); + doAnswer((InvocationOnMock inv ) -> { + RepairCoordinator runnable = spy(repairState.getRepairRunnable(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2), + inv.getArgument(3))); + if (repairRunableCalls.getAndIncrement() == 0) + { + // this will be used for first repair job + doAnswer(invocation -> { + // repair runnable for the first repair job will immediately fail + failingListener.set(invocation.getArgument(0, AutoRepair.RepairProgressListener.class)); + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(runnable).addProgressListener(Mockito.any()); + failingRepair.set(runnable); + } + else + { + // this will be used for subsequent repair jobs + doAnswer(invocation -> { + if (repairRunableCalls.get() > 0) + { + // repair runnable for the subsequent repair jobs will immediately complete + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + + } + // repair runnable for the first repair job will continue firing ERROR events + failingListener.get().progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(runnable).addProgressListener(Mockito.any()); + succeedingRepair.set(runnable); + } + return runnable; + }).when(spyState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); + when(spyState.getLastRepairTime()).thenReturn((long) 0); + AutoRepairService.instance.getAutoRepairConfig().setRepairMaxRetries(0); + AutoRepair.instance.repairStates.put(repairType, spyState); + + AutoRepair.instance.repair(repairType); + + assertEquals(1, (int) AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue()); + // only the first repair job should have failed despite it continuously firing ERROR events + verify(spyState, times(1)).setFailedTokenRangesCount(1); + } + + @Test + public void testProgressError() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0, "test")); + + assertFalse(listener.success); + assertTrue(listener.condition.isSignalled()); + } + + @Test + public void testProgressProgress() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.PROGRESS, 0, 0, "test")); + + assertFalse(listener.success); + assertFalse(listener.condition.isSignalled()); + } + + @Test + public void testProgresComplete() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0, "test")); + + assertTrue(listener.success); + assertTrue(listener.condition.isSignalled()); + } + + @Test + public void testAwait() throws Exception + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + listener.progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0, "test")); + + listener.await(); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index b4bb20c7eafd..13ca3d62d676 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -21,8 +21,6 @@ import java.util.Arrays; import java.util.Collection; import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.TimeUnit; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -36,17 +34,13 @@ import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; import org.apache.cassandra.service.AutoRepairService; -import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.progress.ProgressEvent; -import org.apache.cassandra.utils.progress.ProgressEventType; import org.mockito.Mock; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.when; import static org.mockito.MockitoAnnotations.initMocks; /** @@ -89,62 +83,7 @@ public void testGetRepairRunnable() } @Test - public void testProgressError() throws InterruptedException - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); - - state.progress("test", progressEvent); - - assertFalse(state.success); - assertTrue(state.condition.await(0, TimeUnit.MILLISECONDS)); - } - - @Test - public void testProgress_progress() throws InterruptedException - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - when(progressEvent.getType()).thenReturn(ProgressEventType.PROGRESS); - - state.progress("test", progressEvent); - - assertTrue(state.success); - assertFalse(state.condition.await(0, TimeUnit.MILLISECONDS)); - } - - - @Test - public void testProgress_complete() throws InterruptedException - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - when(progressEvent.getType()).thenReturn(ProgressEventType.COMPLETE); - - state.progress("test", progressEvent); - - assertTrue(state.success); - assertTrue(state.condition.await(1, TimeUnit.MILLISECONDS)); - } - - @Test - public void testWaitForRepairToComplete() throws Exception - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.condition.signalAll(); - Condition finishedCondition = Condition.newOneTimeCondition(); - Callable waitForRepairToComplete = () -> { - state.waitForRepairToComplete(new DurationSpec.IntSecondsBound("12h")); - finishedCondition.signalAll(); - return null; - }; - - waitForRepairToComplete.call(); - - assertTrue(finishedCondition.await(1, TimeUnit.MILLISECONDS)); - } - - @Test - public void testGetLastRepairTime() - { + public void testGetLastRepairTime() { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.lastRepairTimeInMs = 1; @@ -372,42 +311,4 @@ public void testGetFailedTokenRangesCount() assertEquals(1, state.getFailedTokenRangesCount()); } - - @Test - public void isSuccess() - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.success = true; - - assertTrue(state.isSuccess()); - - state.success = false; - - assertFalse(state.isSuccess()); - } - - @Test - public void testWaitForRepairToCompleteDoesNotSetSuccessWhenProgressReceivesError() throws InterruptedException - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); - - state.progress("test", progressEvent); - assertFalse(state.success); - - state.waitForRepairToComplete(new DurationSpec.IntSecondsBound("12h")); - assertFalse(state.success); - } - - @Test - public void testResetWaitCondition() - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.condition.signalAll(); - assertTrue(state.condition.isSignalled()); - - state.resetWaitCondition(); - - assertFalse(state.condition.isSignalled()); - } } From 75f4471ec3485004cadc6f1b5ceb44e19ac3b28c Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Mon, 24 Feb 2025 13:46:54 -0800 Subject: [PATCH 149/257] Address comments --- .../repair/autorepair/AutoRepairParameterizedTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index a6ed14ff77ea..ca5f0d50577c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -20,6 +20,7 @@ import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -830,8 +831,8 @@ public void testNoSoakAfterRepair() public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() { RepairOption options = new RepairOption(RepairParallelism.PARALLEL, true, repairType == AutoRepairConfig.RepairType.INCREMENTAL, false, - AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), Set.of(), - false, false, PreviewKind.NONE, false, true, false, false, false, false); + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), Collections.emptySet(), + false, false, PreviewKind.NONE, false, true, false, false, false, false); AutoRepairState repairState = AutoRepair.instance.repairStates.get(repairType); AutoRepairState spyState = spy(repairState); AtomicReference failingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); From 3f6b987532e7764dc81bc240adcdbd0b8cf985ed Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Mon, 24 Feb 2025 13:52:21 -0800 Subject: [PATCH 150/257] Address comments --- .../apache/cassandra/repair/autorepair/AutoRepair.java | 8 ++++---- .../repair/autorepair/AutoRepairParameterizedTest.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index ed28bfcf1afb..6a92bbe28fe1 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -324,7 +324,7 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim try { long jobStartTime = timeFunc.get(); - listener.await(); + listener.await(config.getRepairSessionTimeout(repairType)); success = listener.isSuccess(); soakAfterRepair(jobStartTime, config.getRepairTaskMinDuration().toMilliseconds()); } @@ -524,10 +524,10 @@ public RepairProgressListener(AutoRepairConfig.RepairType repairType) this.repairType = repairType; } - public void await() throws InterruptedException + public void await(DurationSpec.IntSecondsBound repairSessionTimeout) throws InterruptedException { //if for some reason we don't hear back on repair progress for sometime - if (!condition.await(12, TimeUnit.HOURS)) + if (!condition.await(repairSessionTimeout.to(TimeUnit.SECONDS), TimeUnit.SECONDS)) { success = false; } @@ -542,7 +542,7 @@ public boolean isSuccess() public void progress(String tag, ProgressEvent event) { ProgressEventType type = event.getType(); - String message = String.format("[%s] %s", format.format(System.currentTimeMillis()), event.getMessage()); + String message = String.format("[%s] %s", format.format(Clock.Global.currentTimeMillis()), event.getMessage()); if (type == ProgressEventType.ERROR) { logger.error("Repair failure for repair {}: {}", repairType.toString(), message); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index ca5f0d50577c..31541f589792 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -921,6 +921,6 @@ public void testAwait() throws Exception AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); listener.progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0, "test")); - listener.await(); + listener.await(new DurationSpec.IntSecondsBound("12h")); } } From 90f40b3e835f25cf85a1e2a4b74cbb3baa34b68f Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Mon, 24 Feb 2025 15:20:30 -0800 Subject: [PATCH 151/257] Fix formatting --- .../org/apache/cassandra/repair/autorepair/AutoRepair.java | 2 +- .../cassandra/repair/autorepair/AutoRepairStateTest.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 6a92bbe28fe1..6d392470f210 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -527,7 +527,7 @@ public RepairProgressListener(AutoRepairConfig.RepairType repairType) public void await(DurationSpec.IntSecondsBound repairSessionTimeout) throws InterruptedException { //if for some reason we don't hear back on repair progress for sometime - if (!condition.await(repairSessionTimeout.to(TimeUnit.SECONDS), TimeUnit.SECONDS)) + if (!condition.await(repairSessionTimeout.toSeconds(), TimeUnit.SECONDS)) { success = false; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index 13ca3d62d676..fc4ac2f03c79 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -83,7 +83,8 @@ public void testGetRepairRunnable() } @Test - public void testGetLastRepairTime() { + public void testGetLastRepairTime() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.lastRepairTimeInMs = 1; From 2ba3cdd9ff10365ad8c7aec08a55c840421db1cd Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 3 Mar 2025 09:26:27 -0600 Subject: [PATCH 152/257] Account for vnodes with repair_by_keyspace in RepairTokenRangeSplitter Updates RepairTokenRangeSplitter to call getRepairAssignmentsForKeyspace for each individual token range so repair_by_keyspace grouping works correctly. Patch by Andy Tolbert; Reviewed by Jaydeepkumar Chovatia for CASSANDRA-20392 --- .../autorepair/RepairTokenRangeSplitter.java | 46 ++++---- .../AutoRepairParameterizedTest.java | 48 ++++----- .../RepairTokenRangeSplitterTest.java | 102 +++++++++++++----- 3 files changed, 124 insertions(+), 72 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index b6dcfc61b4ae..a76cf3301d03 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -121,7 +121,7 @@ *
      • * max_bytes_per_schedule: The maximum number of bytes to cover an individual schedule. This serves * as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to - * reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if + * reduce this value if the impact of repairs is causing too much load on the cluster, or increase it if * writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up * the min_repair_interval. *
      • @@ -294,8 +294,16 @@ protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repair return new KeyspaceRepairAssignments(priority, repairPlan.getKeyspaceName(), Collections.emptyList()); } - Collection> tokenRanges = getTokenRanges(primaryRangeOnly, repairPlan.getKeyspaceName()); - List repairAssignments = getRepairAssignmentsForKeyspace(repairType, repairPlan.getKeyspaceName(), repairPlan.getTableNames(), tokenRanges); + List> tokenRanges = getTokenRanges(primaryRangeOnly, repairPlan.getKeyspaceName()); + // shuffle token ranges to unbias selection of ranges + Collections.shuffle(tokenRanges); + List repairAssignments = new ArrayList<>(); + // Generate assignments for each range speparately + for (Range tokenRange : tokenRanges) + { + repairAssignments.addAll(getRepairAssignmentsForKeyspace(repairType, repairPlan.getKeyspaceName(), repairPlan.getTableNames(), tokenRange)); + } + FilteredRepairAssignments filteredRepairAssignments = filterRepairAssignments(priority, repairPlan.getKeyspaceName(), repairAssignments, bytesSoFar); bytesSoFar = filteredRepairAssignments.newBytesSoFar; return new KeyspaceRepairAssignments(priority, repairPlan.getKeyspaceName(), filteredRepairAssignments.repairAssignments); @@ -303,7 +311,7 @@ protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repair } @VisibleForTesting - List getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Collection> tokenRanges) + List getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Range tokenRange) { List repairAssignments = new ArrayList<>(); // this is used for batching minimal single assignment tables together @@ -313,9 +321,10 @@ List getRepairAssignmentsForKeyspace(AutoRepairConfig.Rep // If we can repair by keyspace, sort the tables by size so can batch the smallest ones together boolean repairByKeyspace = config.getRepairByKeyspace(repairType); + List tablesToProcess = tableNames; if (repairByKeyspace) { - tableNames.sort((t1, t2) -> { + tablesToProcess = tableNames.stream().sorted((t1, t2) -> { ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); // If for whatever reason the CFS is not retrievable, we can assume it has been deleted, so give the @@ -331,12 +340,12 @@ else if (cfs2 == null) return 1; } return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); - }); + }).toList(); } - for (String tableName : tableNames) + for (String tableName : tablesToProcess) { - List tableAssignments = getRepairAssignmentsForTable(keyspaceName, tableName, tokenRanges); + List tableAssignments = getRepairAssignmentsForTable(keyspaceName, tableName, tokenRange); if (tableAssignments.isEmpty()) continue; @@ -519,9 +528,9 @@ static SizedRepairAssignment merge(List assignments) } @VisibleForTesting - protected List getRepairAssignmentsForTable(String keyspaceName, String tableName, Collection> tokenRanges) + protected List getRepairAssignmentsForTable(String keyspaceName, String tableName, Range tokenRange) { - List sizeEstimates = getRangeSizeEstimate(keyspaceName, tableName, tokenRanges); + List sizeEstimates = getRangeSizeEstimate(keyspaceName, tableName, tokenRange); return getRepairAssignments(sizeEstimates); } @@ -632,7 +641,7 @@ private int calculateNumberOfSplits(SizeEstimate estimate) return splits; } - private Collection> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) + private List> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) { // Collect all applicable token ranges Collection> wrappedRanges; @@ -654,18 +663,15 @@ private Collection> getTokenRanges(boolean primaryRangeOnly, String return ranges; } - private List getRangeSizeEstimate(String keyspace, String table, Collection> tokenRanges) + private List getRangeSizeEstimate(String keyspace, String table, Range tokenRange) { List sizeEstimates = new ArrayList<>(); - for (Range tokenRange : tokenRanges) + logger.debug("Calculating size estimate for {}.{} for range {}", keyspace, table, tokenRange); + try (Refs refs = getSSTableReaderRefs(repairType, keyspace, table, tokenRange)) { - logger.debug("Calculating size estimate for {}.{} for range {}", keyspace, table, tokenRange); - try (Refs refs = getSSTableReaderRefs(repairType, keyspace, table, tokenRange)) - { - SizeEstimate estimate = getSizesForRangeOfSSTables(repairType, keyspace, table, tokenRange, refs); - logger.debug("Generated size estimate {}", estimate); - sizeEstimates.add(estimate); - } + SizeEstimate estimate = getSizesForRangeOfSSTables(repairType, keyspace, table, tokenRange, refs); + logger.debug("Generated size estimate {}", estimate); + sizeEstimates.add(estimate); } return sizeEstimates; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 31541f589792..a5d2c7741950 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -133,10 +133,9 @@ public static void setupClass() throws Exception StorageService.instance.doAutoRepairSetup(); DatabaseDescriptor.setCDCEnabled(false); - // Calculate the expected number of tables to be repaired, this should be all system keyspaces that are - // distributed, plus 1 for the table we created (ks.tbl), excluding the 'mv' materialized view and - // 'tbl_disabled_auto_repair' we created. - int expectedTablesGoingThroughRepair = 0; + // Calculate the expected number of keyspaces to be repaired, this should be all system keyspaces that are + // distributed, plus 1 for the table we created (ks.tbl). + int expectedKeyspacesGoingThroughRepair = 0; for (Keyspace keyspace : Keyspace.all()) { // skip LocalStrategy keyspaces as these aren't repaired. @@ -150,11 +149,10 @@ public static void setupClass() throws Exception continue; } - int expectedTables = keyspace.getName().equals("ks") ? 1 : keyspace.getColumnFamilyStores().size(); - expectedTablesGoingThroughRepair += expectedTables; + expectedKeyspacesGoingThroughRepair += 1; } // Since the splitter will unwrap a full token range, we expect twice as many repairs. - expectedRepairAssignments = expectedTablesGoingThroughRepair * 2; + expectedRepairAssignments = expectedKeyspacesGoingThroughRepair * 2; } @Before @@ -170,7 +168,8 @@ public void setup() DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - MockitoAnnotations.initMocks(this); + //noinspection resource + MockitoAnnotations.openMocks(this); Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).truncateBlocking(); Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).disableAutoCompaction(); @@ -263,7 +262,7 @@ public void testRepairAsync() public void testRepairTurn() { UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + Assert.assertNotEquals("Expected my turn for the repair", NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); } @Test @@ -288,8 +287,8 @@ public void testTooFrequentRepairs() AutoRepair.instance.repair(repairType); long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); - Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), - consideredTables, 0); + Assert.assertNotEquals(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), + 0, consideredTables); //if repair was done in last 24 hours then it should not trigger another repair config.setRepairMinInterval(repairType, "24h"); @@ -313,12 +312,12 @@ public void testNonFrequentRepairs() Assert.assertTrue(String.format("Expected lastRepairTime1 > 0, actual value lastRepairTime1 %d", lastRepairTime1), lastRepairTime1 > 0); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", - AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + Assert.assertNotEquals("Expected my turn for the repair", + NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); AutoRepair.instance.repair(repairType); long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertNotSame(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + - "lastRepairTime2 ", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @@ -330,16 +329,15 @@ public void testGetPriorityHosts() AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); long prevCount = state.getTotalMVTablesConsideredForRepair(); AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); - Assert.assertSame(String.format("Priority host count is not same, actual value %d, expected value %d", - AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); + Assert.assertEquals(String.format("Priority host count is not same, actual value %d, expected value %d", + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), 0, AutoRepairUtils.getPriorityHosts(repairType).size()); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != - NOT_MY_TURN); + Assert.assertNotEquals("Expected my turn for the repair", NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); AutoRepair.instance.repair(repairType); AutoRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); AutoRepair.instance.repair(repairType); - Assert.assertSame(String.format("Priority host count is not same actual value %d, expected value %d", - AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); + Assert.assertEquals(String.format("Priority host count is not same actual value %d, expected value %d", + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), 0, AutoRepairUtils.getPriorityHosts(repairType).size()); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @@ -388,7 +386,7 @@ public void testGetAllMVs() config.setMaterializedViewRepairEnabled(repairType, true); assertTrue(config.getMaterializedViewRepairEnabled(repairType)); - assertEquals(Arrays.asList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); + assertEquals(List.of(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); config.setMaterializedViewRepairEnabled(repairType, false); } @@ -571,7 +569,7 @@ public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() AutoRepair.instance.repair(repairType); int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), - consideredTables, 0); + 0, consideredTables); int disabledTablesRepairCountAfter = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); Assert.assertTrue(String.format("A table %s should be skipped from auto repair, expected value: %d, actual value %d ", TABLE_DISABLED_AUTO_REPAIR, disabledTablesRepairCountBefore + 1, disabledTablesRepairCountAfter), disabledTablesRepairCountBefore < disabledTablesRepairCountAfter); @@ -662,7 +660,7 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); - // Expect configured retries for each table expected to be repaired + // Expect configured retries for each keyspace expected to be repaired assertEquals(config.getRepairMaxRetries()*expectedRepairAssignments, sleepCalls.get()); verify(autoRepairState, times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); @@ -835,9 +833,7 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() false, false, PreviewKind.NONE, false, true, false, false, false, false); AutoRepairState repairState = AutoRepair.instance.repairStates.get(repairType); AutoRepairState spyState = spy(repairState); - AtomicReference failingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); AtomicReference failingListener = new AtomicReference<>(); - AtomicReference succeedingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); AtomicInteger repairRunableCalls = new AtomicInteger(); doAnswer((InvocationOnMock inv ) -> { RepairCoordinator runnable = spy(repairState.getRepairRunnable(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2), @@ -851,7 +847,6 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); return null; }).when(runnable).addProgressListener(Mockito.any()); - failingRepair.set(runnable); } else { @@ -867,7 +862,6 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() failingListener.get().progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); return null; }).when(runnable).addProgressListener(Mockito.any()); - succeedingRepair.set(runnable); } return runnable; }).when(spyState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index deea6a33f1a3..d5c6114af5a1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -34,11 +34,11 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.auth.AuthKeyspace; import org.apache.cassandra.config.DataStorageSpec.LongMebibytesBound; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; @@ -55,6 +55,8 @@ import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.BYTES_PER_ASSIGNMENT; import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.MAX_TABLES_PER_ASSIGNMENT; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** @@ -87,6 +89,7 @@ public static void setUpClass() @Before public void setUp() { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(sstableFormat)); repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); @@ -102,7 +105,7 @@ public void setUp() } @Test - public void testSizePartitionCount() throws Throwable + public void testSizePartitionCount() { insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); try (Refs sstables = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE)) @@ -114,7 +117,7 @@ public void testSizePartitionCount() throws Throwable } @Test - public void testSizePartitionCountSplit() throws Throwable + public void testSizePartitionCountSplit() { int partitionCount = 100_000; int[] values = new int[partitionCount]; @@ -143,28 +146,25 @@ public void testSizePartitionCountSplit() throws Throwable public void testGetRepairAssignmentsForTable_NoSSTables() { // Should return 1 assignment if there are no SSTables - Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, FULL_RANGE); assertEquals(1, assignments.size()); } @Test - public void testGetRepairAssignmentsForTable_Single() throws Throwable + public void testGetRepairAssignmentsForTable_Single() { - Collection> ranges = Collections.singleton(new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken())); insertAndFlushSingleTable(); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, FULL_RANGE); assertEquals(1, assignments.size()); } @Test - public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable + public void testGetRepairAssignmentsForTable_BatchingTables() { repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "2")); - Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); // We expect two assignments, one with table1 and table2 batched, and one with table3 assertEquals(2, assignments.size()); @@ -173,13 +173,12 @@ public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable } @Test - public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable + public void testGetRepairAssignmentsForTable_BatchSize() { repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "2")); - Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(2); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); // We expect one assignment, with two tables batched assertEquals(1, assignments.size()); @@ -187,25 +186,23 @@ public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable } @Test - public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable + public void testGetRepairAssignmentsForTable_NoBatching() { repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "1")); - Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); assertEquals(3, assignments.size()); } @Test - public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable + public void testGetRepairAssignmentsForTable_AllBatched() { repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "100")); - Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(5); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); assertEquals(1, assignments.size()); } @@ -333,6 +330,62 @@ public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxByte assertEquals(expectedBytes * 2, filteredRepairAssignments.newBytesSoFar); } + @Test + public void testTokenRangesRepairByKeyspace() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); + + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", AuthKeyspace.TABLE_NAMES.stream().toList()); + final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); + + Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + // Should only be two assignments (since single node encompasses the whole range, should get 2 primary ranges) + // to account for the range wrapping the ring. + assertEquals(2, assignments.size()); + + for (RepairAssignment assignment : assignments) + { + assertEquals(AuthKeyspace.TABLE_NAMES.size(), assignment.getTableNames().size()); + } + } + + @Test + public void testTokenRangesRepairByKeyspaceFalse() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, false); + + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", AuthKeyspace.TABLE_NAMES.stream().toList()); + final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); + + Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + // Should be two ranges * X system_auth table names assignments + assertEquals(2 * AuthKeyspace.TABLE_NAMES.size(), assignments.size()); + + // each assignment should only include one table. + for (RepairAssignment assignment : assignments) + { + assertEquals(1, assignment.getTableNames().size()); + } + } + @Test(expected = IllegalArgumentException.class) public void testSetParameterShouldNotAllowUnknownParameter() { @@ -377,13 +430,13 @@ private SizeEstimate sizeEstimateByBytes(LongMebibytesBound sizeInRange, LongMeb return new SizeEstimate(RepairType.INCREMENTAL, KEYSPACE, "table1", FULL_RANGE, 1, sizeInRange.toBytes(), totalSize.toBytes()); } - private void insertAndFlushSingleTable() throws Throwable + private void insertAndFlushSingleTable() { execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); flush(); } - private List createAndInsertTables(int count) throws Throwable + private List createAndInsertTables(int count) { List tableNames = new ArrayList<>(); for (int i = 0; i < count; i++) @@ -395,13 +448,12 @@ private List createAndInsertTables(int count) throws Throwable return tableNames; } - private void insertAndFlushTable(String tableName) throws Throwable + private void insertAndFlushTable(String tableName) { insertAndFlushTable(tableName, 1); - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(KEYSPACE, tableName); } - private void insertAndFlushTable(String tableName, int... vals) throws Throwable + private void insertAndFlushTable(String tableName, int... vals) { for (int i : vals) { From 88a75d8e4b2b0724413bb322eeceb332aed675ca Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 3 Mar 2025 15:25:16 -0600 Subject: [PATCH 153/257] Make work with JDK11 (and JDK8) --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- .../repair/autorepair/AutoRepairParameterizedTest.java | 2 +- .../repair/autorepair/RepairTokenRangeSplitterTest.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index a76cf3301d03..d746d81b71f6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -340,7 +340,7 @@ else if (cfs2 == null) return 1; } return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); - }).toList(); + }).collect(Collectors.toList()); } for (String tableName : tablesToProcess) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index a5d2c7741950..faa70e3d451f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -386,7 +386,7 @@ public void testGetAllMVs() config.setMaterializedViewRepairEnabled(repairType, true); assertTrue(config.getMaterializedViewRepairEnabled(repairType)); - assertEquals(List.of(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); + assertEquals(Collections.singletonList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); config.setMaterializedViewRepairEnabled(repairType, false); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index d5c6114af5a1..f727a3a2c44b 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -335,7 +335,7 @@ public void testTokenRangesRepairByKeyspace() { AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); - final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", AuthKeyspace.TABLE_NAMES.stream().toList()); + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", new ArrayList<>(AuthKeyspace.TABLE_NAMES)); final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); @@ -363,7 +363,7 @@ public void testTokenRangesRepairByKeyspaceFalse() { AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, false); - final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", AuthKeyspace.TABLE_NAMES.stream().toList()); + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", new ArrayList<>(AuthKeyspace.TABLE_NAMES)); final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); From 6af203cd65777751fa5f0185392eb58d2de81377 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 3 Mar 2025 15:47:20 -0800 Subject: [PATCH 154/257] Adjust as per latest trunk as of 03-Mar-2025 From 4366f830abc630755459f2322142fc4e2c825498 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 3 Mar 2025 16:11:46 -0800 Subject: [PATCH 155/257] Type & Remove whitespace From 431562336325845e2adfea6e92239110e5abdd4c Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 12:19:57 -0800 Subject: [PATCH 156/257] Ensure the main config obj has been initialized before checking the yaml property --- src/java/org/apache/cassandra/schema/SchemaKeyspace.java | 2 +- src/java/org/apache/cassandra/schema/TableParams.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 7781ef29b9e6..69b43555c42a 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -622,7 +622,7 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui // As above, only add the auto_repair column if the scheduler is enabled // to avoid RTE in pre-5.1 versioned node during upgrades - if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (DatabaseDescriptor.getRawConfig() != null && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { builder.add("auto_repair", params.autoRepair.asMap()); } diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index c56ddd4ab41f..9c870d2a0b59 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -421,7 +421,7 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) } builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); - if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (DatabaseDescriptor.getRawConfig() != null && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { builder.newLine() .append("AND auto_repair = ").append(autoRepair.asMap()); From 0857a9f3b95bb728408ccd145e25b520681fefc4 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 13:59:01 -0800 Subject: [PATCH 157/257] Use the last repair stat to confirm that the repair ran indeed instead of sleep to avoid flaky test --- .../repair/autorepair/AutoRepair.java | 2 +- .../test/repair/AutoRepairSchedulerTest.java | 57 +++++++++++++------ 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 6d392470f210..add579851cf0 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -89,7 +89,7 @@ public class AutoRepair protected final Map repairRunnableExecutors; @VisibleForTesting - protected final Map repairStates; + public final Map repairStates; @VisibleForTesting // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index c6fa37f54dd7..a2492858654e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -21,13 +21,13 @@ import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.concurrent.TimeUnit; import java.util.UUID; import com.google.common.collect.ImmutableMap; -import com.google.common.util.concurrent.Uninterruptibles; + import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.schema.SystemDistributedKeyspace; + import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -63,19 +63,19 @@ public static void init() throws IOException ImmutableMap.of( "repair_type_overrides", ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), - ImmutableMap.of( - "initial_scheduler_delay", "5s", - "enabled", "true", - "parallel_repair_count", "1", - "parallel_repair_percentage", "0", - "min_repair_interval", "1s"), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "1", + "parallel_repair_percentage", "0", + "min_repair_interval", "1s"), AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), - ImmutableMap.of( - "initial_scheduler_delay", "5s", - "enabled", "true", - "parallel_repair_count", "1", - "parallel_repair_percentage", "0", - "min_repair_interval", "1s")))) + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "1", + "parallel_repair_percentage", "0", + "min_repair_interval", "1s")))) .set("auto_repair.enabled", "true") .set("auto_repair.global_settings.repair_by_keyspace", "true") .set("auto_repair.repair_task_min_duration", "0s") @@ -106,9 +106,34 @@ public void testScheduler() throws ParseException throw new RuntimeException(e); } })); - // wait for a couple of minutes for repair to go through on all three nodes - Uninterruptibles.sleepUninterruptibly(2, TimeUnit.MINUTES); + // validate that the repair ran on all nodes + cluster.forEach(i -> i.runOnInstance(() -> { + try + { + while(true) + { + if (AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL).getLastRepairTime() == 0 + || AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL).getLastRepairTime() == 0) + { + try + { + Thread.sleep(5000); + } + catch (InterruptedException e) + { + throw new RuntimeException(e); + } + continue; + } + break; + } + } + catch (Exception e) + { + throw new RuntimeException(e); + } + })); validate(AutoRepairConfig.RepairType.FULL.toString()); validate(AutoRepairConfig.RepairType.INCREMENTAL.toString()); } From 63a9aacdab5a527f1c09232bcacafa2324f6d97c Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 14:29:09 -0800 Subject: [PATCH 158/257] Incorporate the code review comments --- .../cassandra/schema/SchemaKeyspace.java | 3 ++- .../apache/cassandra/schema/TableParams.java | 3 ++- .../test/repair/AutoRepairSchedulerTest.java | 25 ++++++------------- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 69b43555c42a..fe98cfa0ec1a 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -622,7 +622,8 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui // As above, only add the auto_repair column if the scheduler is enabled // to avoid RTE in pre-5.1 versioned node during upgrades - if (DatabaseDescriptor.getRawConfig() != null && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (DatabaseDescriptor.getRawConfig() != null + && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { builder.add("auto_repair", params.autoRepair.asMap()); } diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 9c870d2a0b59..2fe01b78470a 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -421,7 +421,8 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) } builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); - if (DatabaseDescriptor.getRawConfig() != null && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (DatabaseDescriptor.getRawConfig() != null + && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { builder.newLine() .append("AND auto_repair = ").append(autoRepair.asMap()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index a2492858654e..9f0690123cf7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -22,9 +22,11 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.UUID; +import java.util.concurrent.TimeUnit; import com.google.common.collect.ImmutableMap; +import org.apache.cassandra.Util; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.schema.SystemDistributedKeyspace; @@ -40,6 +42,7 @@ import org.apache.cassandra.service.AutoRepairService; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; +import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertEquals; /** @@ -111,23 +114,11 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { - while(true) - { - if (AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL).getLastRepairTime() == 0 - || AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL).getLastRepairTime() == 0) - { - try - { - Thread.sleep(5000); - } - catch (InterruptedException e) - { - throw new RuntimeException(e); - } - continue; - } - break; - } + Util.spinAssert("AutoRepair has not yet completed one repair cycle", + greaterThan(0L), + AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, + 5, + TimeUnit.MINUTES); } catch (Exception e) { From 56d6ce09be93c7af336370557818050f3a349984 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 14:34:54 -0800 Subject: [PATCH 159/257] Incorporate the code review comments --- .../distributed/test/repair/AutoRepairSchedulerTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 9f0690123cf7..326029392395 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -114,11 +114,16 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { - Util.spinAssert("AutoRepair has not yet completed one repair cycle", + Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", greaterThan(0L), AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, 5, TimeUnit.MINUTES); + Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", + greaterThan(0L), + AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL)::getLastRepairTime, + 5, + TimeUnit.MINUTES); } catch (Exception e) { From 66bfb3a47ca7df5b12b503f619fda561f820424c Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 14:37:26 -0800 Subject: [PATCH 160/257] Group all public statements together --- .../org/apache/cassandra/repair/autorepair/AutoRepair.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index add579851cf0..07fb4793c51e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -83,14 +83,14 @@ public class AutoRepair // Sleep for 5 seconds if repair finishes quickly to flush JMX metrics; it happens only for Cassandra nodes with tiny amount of data. public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); + @VisibleForTesting + public final Map repairStates; + @VisibleForTesting protected final Map repairExecutors; protected final Map repairRunnableExecutors; - @VisibleForTesting - public final Map repairStates; - @VisibleForTesting // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. From ef5b838586d13f1e8e00768d824221540729dadc Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 14:41:44 -0800 Subject: [PATCH 161/257] No need for try/catch in the test case --- .../test/repair/AutoRepairSchedulerTest.java | 27 +++++++------------ 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 326029392395..c7b7448d1a24 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -112,23 +112,16 @@ public void testScheduler() throws ParseException // validate that the repair ran on all nodes cluster.forEach(i -> i.runOnInstance(() -> { - try - { - Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", - greaterThan(0L), - AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, - 5, - TimeUnit.MINUTES); - Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", - greaterThan(0L), - AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL)::getLastRepairTime, - 5, - TimeUnit.MINUTES); - } - catch (Exception e) - { - throw new RuntimeException(e); - } + Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", + greaterThan(0L), + AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, + 5, + TimeUnit.MINUTES); + Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", + greaterThan(0L), + AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL)::getLastRepairTime, + 5, + TimeUnit.MINUTES); })); validate(AutoRepairConfig.RepairType.FULL.toString()); validate(AutoRepairConfig.RepairType.INCREMENTAL.toString()); From 855bfbef76994e2c46a82f8f5e84eeec3f8040c9 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 7 Mar 2025 14:42:25 -0800 Subject: [PATCH 162/257] Avoid micros splitting of token ranges for FixedSplitTokenRangeSplitter --- .../autorepair/FixedSplitTokenRangeSplitter.java | 4 +++- .../autorepair/FixedSplitTokenRangeSplitterTest.java | 12 ++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index e27b8188bd2f..f8b28fa6001e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -102,9 +102,11 @@ private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(boolean primar boolean byKeyspace = config.getRepairByKeyspace(repairType); // collect all token ranges. List> allRanges = new ArrayList<>(); + // this is done to avoid micro splits in the case of vnodes + int splitsPerRange = Math.max(1, numberOfSubranges / tokens.size()); for (Range token : tokens) { - allRanges.addAll(split(token, numberOfSubranges)); + allRanges.addAll(split(token, splitsPerRange)); } if (byKeyspace) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index 6326f55f9d5c..67deafac71b6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -61,8 +61,11 @@ public class FixedSplitTokenRangeSplitterTest private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; + private static final int numberOfSubRanges = 4; - private static final Map splitterParams = Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(4)); + private static final int totalTokenRanges = 3; + private static int numberOfSplits; + private static final Map splitterParams = Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges)); @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; @@ -96,16 +99,16 @@ public static void setupClass() throws Exception SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); + + numberOfSplits = numberOfSubRanges/totalTokenRanges; } @Test public void testTokenRangesSplitByTable() { AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); - int totalTokenRanges = 3; Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(totalTokenRanges, tokens.size()); - int numberOfSplits = 4; List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (int i = 0; i < tables.size(); i++) @@ -156,10 +159,8 @@ public void testTokenRangesSplitByTable() public void testTokenRangesSplitByKeyspace() { AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); - int totalTokenRanges = 3; Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(totalTokenRanges, tokens.size()); - int numberOfSplits = 4; List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (Range range : tokens) @@ -195,7 +196,6 @@ public void testTokenRangesSplitByKeyspace() public void testTokenRangesNoSplitByDefault() { Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - int totalTokenRanges = 3; assertEquals(totalTokenRanges, tokens.size()); List> expectedToken = new ArrayList<>(tokens); From bc55345e9ede541b21da2205c25339bcd8b1d21d Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 7 Mar 2025 18:07:00 -0800 Subject: [PATCH 163/257] Make the number_of_subranges parameterized in test case --- .../FixedSplitTokenRangeSplitterTest.java | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index 67deafac71b6..ebe3fe673561 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -26,8 +26,8 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -import java.util.Map; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; @@ -61,19 +61,27 @@ public class FixedSplitTokenRangeSplitterTest private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; - private static final int numberOfSubRanges = 4; - private static final int totalTokenRanges = 3; private static int numberOfSplits; - private static final Map splitterParams = Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges)); - @Parameterized.Parameter() + @Parameterized.Parameter(0) public AutoRepairConfig.RepairType repairType; - @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() + @Parameterized.Parameter(1) + public int numberOfSubRange; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRange={1}") + public static Collection parameters() { - return Arrays.asList(AutoRepairConfig.RepairType.values()); + List params = new ArrayList<>(); + for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) + { + for (int subRange : Arrays.asList(4, 16, 64, 128)) + { + params.add(new Object[]{ type, subRange }); + } + } + return params; } @BeforeClass @@ -99,8 +107,12 @@ public static void setupClass() throws Exception SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); + } - numberOfSplits = numberOfSubRanges/totalTokenRanges; + @Before + public void before() + { + numberOfSplits = numberOfSubRange / totalTokenRanges; } @Test @@ -121,7 +133,7 @@ public void testTokenRangesSplitByTable() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, splitterParams) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRange))) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -130,7 +142,7 @@ public void testTokenRangesSplitByTable() assertFalse(keyspaceAssignments.hasNext()); List assignments = keyspace.getRepairAssignments(); - assertEquals(totalTokenRanges*numberOfSplits*tables.size(), assignments.size()); + assertEquals(totalTokenRanges * numberOfSplits * tables.size(), assignments.size()); assertEquals(expectedToken.size(), assignments.size()); int expectedTableIndex = -1; @@ -144,9 +156,9 @@ public void testTokenRangesSplitByTable() expectedTableIndex = -1; // should be a set of ranges for each table. - for (int i = 0; i plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, splitterParams) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRange))) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -181,11 +193,11 @@ public void testTokenRangesSplitByKeyspace() List assignments = keyspace.getRepairAssignments(); assertNotNull(assignments); - assertEquals(totalTokenRanges*numberOfSplits, assignments.size()); + assertEquals(totalTokenRanges * numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); // should only be one set of ranges for the entire keyspace. - for (int i = 0; i Date: Sun, 9 Mar 2025 13:51:09 -0700 Subject: [PATCH 164/257] Review comments from Andy --- .../FixedSplitTokenRangeSplitter.java | 13 +- .../FixedSplitTokenRangeSplitterTest.java | 121 +++++++++--------- 2 files changed, 75 insertions(+), 59 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index f8b28fa6001e..e571b025a271 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -45,6 +45,17 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt { private static final Logger logger = LoggerFactory.getLogger(FixedSplitTokenRangeSplitter.class); + /** + * Selecting the default value is tricky. If we select a small number then individual repair would be heavy, + * on the other hand, if we select large then too many repair sessions would be created. + *

        + * The default 64 is a good number for non v-nodes, but with v-nodes it can result in too many repair sessions + * because the number of repair sessions on a node = number_of_subranges * num_token. + *

        + * To keep it balances, 32 serves a good default that would cater to both v-nodes and non v-nodes. + */ + public static final int DEFAULT_SUBRANGES = 32; + /** * The number of subranges to split each to-be-repaired token range into. Defaults to 1. *

        @@ -69,7 +80,7 @@ public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map< { this.repairType = repairType; - numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, "1")); + numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, Integer.toString(DEFAULT_SUBRANGES))); } @Override diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index ebe3fe673561..c617771c88b1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -22,12 +22,11 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; +import java.util.TreeSet; -import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; @@ -36,16 +35,18 @@ import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.BootStrapper; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.sai.disk.format.Version; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; +import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_SUBRANGES; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -61,24 +62,30 @@ public class FixedSplitTokenRangeSplitterTest private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; - private static final int totalTokenRanges = 3; - private static int numberOfSplits; @Parameterized.Parameter(0) public AutoRepairConfig.RepairType repairType; @Parameterized.Parameter(1) - public int numberOfSubRange; + public int numberOfSubRanges; - @Parameterized.Parameters(name = "repairType={0}, numberOfSubRange={1}") + @Parameterized.Parameter(2) + public int numTokens; + + public static final int curTokenRange = 1; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}, numTokens={2}") public static Collection parameters() { List params = new ArrayList<>(); for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) { - for (int subRange : Arrays.asList(4, 16, 64, 128)) + for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) { - params.add(new Object[]{ type, subRange }); + for (int numToken : Arrays.asList(curTokenRange)) + { + params.add(new Object[]{ type, subRange, numToken }); + } } } return params; @@ -92,15 +99,8 @@ public static void setupClass() throws Exception DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); ServerTestUtils.prepareServerNoRegister(); - Token t1 = new Murmur3Partitioner.LongToken(-9223372036854775808L); - Token t2 = new Murmur3Partitioner.LongToken(-3074457345618258603L); - Token t3 = new Murmur3Partitioner.LongToken(3074457345618258602L); - Set tokens = new HashSet<>(); - tokens.add(t1); - tokens.add(t2); - tokens.add(t3); - - ServerTestUtils.registerLocal(tokens); + Set tokens1 = BootStrapper.getRandomTokens(ClusterMetadata.current(), curTokenRange); + ServerTestUtils.registerLocal(tokens1); // Ensure that the on-disk format statics are loaded before the test run Version.LATEST.onDiskFormat(); StorageService.instance.doAutoRepairSetup(); @@ -109,18 +109,13 @@ public static void setupClass() throws Exception QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); } - @Before - public void before() - { - numberOfSplits = numberOfSubRange / totalTokenRanges; - } - @Test public void testTokenRangesSplitByTable() { + int numberOfSplits = calcSplits(numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - assertEquals(totalTokenRanges, tokens.size()); + assertEquals(numTokens, tokens.size()); List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (int i = 0; i < tables.size(); i++) @@ -133,7 +128,7 @@ public void testTokenRangesSplitByTable() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRange))) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges))) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -142,37 +137,31 @@ public void testTokenRangesSplitByTable() assertFalse(keyspaceAssignments.hasNext()); List assignments = keyspace.getRepairAssignments(); - assertEquals(totalTokenRanges * numberOfSplits * tables.size(), assignments.size()); + assertEquals(numTokens * numberOfSplits * tables.size(), assignments.size()); assertEquals(expectedToken.size(), assignments.size()); - int expectedTableIndex = -1; - for (int i = 0; i < totalTokenRanges * numberOfSplits * tables.size(); i++) - { - if (i % (totalTokenRanges * numberOfSplits) == 0) - { - expectedTableIndex++; - } - } - - expectedTableIndex = -1; - // should be a set of ranges for each table. - for (int i = 0; i < totalTokenRanges * numberOfSplits * tables.size(); i++) + int assignmentsPerTable = numTokens * numberOfSplits; + for (int i = 0; i < tables.size(); i++) { - if (i % (totalTokenRanges * numberOfSplits) == 0) + List assignmentForATable = new ArrayList<>(); + List> expectedTokensForATable = new ArrayList<>(); + for (int j = 0; j < assignmentsPerTable; j++) { - expectedTableIndex++; + assertEquals(Arrays.asList(tables.get(i)), assignments.get(i*assignmentsPerTable + j).getTableNames()); + assignmentForATable.add(assignments.get(i*assignmentsPerTable+j)); + expectedTokensForATable.add(expectedToken.get(i*assignmentsPerTable+j)); } - assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); - assertEquals(Arrays.asList(tables.get(expectedTableIndex)), assignments.get(i).getTableNames()); + compare(numberOfSplits, expectedTokensForATable, assignmentForATable); } } @Test public void testTokenRangesSplitByKeyspace() { + int numberOfSplits = calcSplits(numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - assertEquals(totalTokenRanges, tokens.size()); + assertEquals(numTokens, tokens.size()); List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (Range range : tokens) @@ -182,7 +171,7 @@ public void testTokenRangesSplitByKeyspace() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRange))) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges))) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -193,23 +182,23 @@ public void testTokenRangesSplitByKeyspace() List assignments = keyspace.getRepairAssignments(); assertNotNull(assignments); - assertEquals(totalTokenRanges * numberOfSplits, assignments.size()); + assertEquals(numTokens * numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); - // should only be one set of ranges for the entire keyspace. - for (int i = 0; i < totalTokenRanges * numberOfSplits; i++) - { - assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); - assertEquals(tables, assignments.get(i).getTableNames()); - } + compare(numberOfSplits, expectedToken, assignments); } @Test - public void testTokenRangesNoSplitByDefault() + public void testTokenRangesWithDefaultSplit() { + int numberOfSplits = calcSplits(DEFAULT_SUBRANGES); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - assertEquals(totalTokenRanges, tokens.size()); - List> expectedToken = new ArrayList<>(tokens); + assertEquals(numTokens, tokens.size()); + List> expectedToken = new ArrayList<>(); + for (Range range : tokens) + { + expectedToken.addAll(AutoRepairUtils.split(range, numberOfSplits)); + } List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1); @@ -224,10 +213,26 @@ public void testTokenRangesNoSplitByDefault() assertNotNull(assignments); // should be 3 entries for the table which covers each token range. - assertEquals(totalTokenRanges, assignments.size()); - for (int i = 0; i < totalTokenRanges; i++) + assertEquals(numTokens * numberOfSplits, assignments.size()); + + compare(numberOfSplits, expectedToken, assignments); + } + + private void compare(int numberOfSplits, List> expectedToken, List assignments) + { + assertEquals(expectedToken.size(), assignments.size()); + Set> a = new TreeSet<>(); + Set> b = new TreeSet<>(); + for (int i = 0; i < numTokens * numberOfSplits; i++) { - assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); + a.add(expectedToken.get(i)); + b.add(assignments.get(i).getTokenRange()); } + assertEquals(a, b); + } + + private int calcSplits(int subRange) + { + return Math.max(1, subRange / numTokens); } } From fcb05eeba990af2dcde95eba0a905deab7bc3ab3 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 14:14:31 -0700 Subject: [PATCH 165/257] Add v-nodes and non v-nodes test cases --- .../FixedSplitTokenRangeSplitter.java | 4 +- ...> FixedSplitTokenRangeSplitterHelper.java} | 96 +++------------- ...edSplitTokenRangeSplitterNoVNodesTest.java | 108 ++++++++++++++++++ ...ixedSplitTokenRangeSplitterVNodesTest.java | 108 ++++++++++++++++++ 4 files changed, 236 insertions(+), 80 deletions(-) rename test/unit/org/apache/cassandra/repair/autorepair/{FixedSplitTokenRangeSplitterTest.java => FixedSplitTokenRangeSplitterHelper.java} (66%) create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index e571b025a271..953216832f1f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -54,7 +54,7 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt *

        * To keep it balances, 32 serves a good default that would cater to both v-nodes and non v-nodes. */ - public static final int DEFAULT_SUBRANGES = 32; + public static final int DEFAULT_NUMBER_OF_SUBRANGES = 32; /** * The number of subranges to split each to-be-repaired token range into. Defaults to 1. @@ -80,7 +80,7 @@ public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map< { this.repairType = repairType; - numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, Integer.toString(DEFAULT_SUBRANGES))); + numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, Integer.toString(DEFAULT_NUMBER_OF_SUBRANGES))); } @Override diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java similarity index 66% rename from test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index c617771c88b1..4b4f41375f8c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -27,92 +27,34 @@ import java.util.Set; import java.util.TreeSet; -import org.junit.BeforeClass; -import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.dht.BootStrapper; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.index.sai.disk.format.Version; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadata; -import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; -import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_SUBRANGES; +import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_NUMBER_OF_SUBRANGES; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** - * Unit tests for {@link org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter} + * Helper class for for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} */ @RunWith(Parameterized.class) -public class FixedSplitTokenRangeSplitterTest +public class FixedSplitTokenRangeSplitterHelper { - private static final String KEYSPACE = "ks"; private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; + public static final String KEYSPACE = "ks"; - @Parameterized.Parameter(0) - public AutoRepairConfig.RepairType repairType; - - @Parameterized.Parameter(1) - public int numberOfSubRanges; - - @Parameterized.Parameter(2) - public int numTokens; - - public static final int curTokenRange = 1; - - @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}, numTokens={2}") - public static Collection parameters() - { - List params = new ArrayList<>(); - for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) - { - for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) - { - for (int numToken : Arrays.asList(curTokenRange)) - { - params.add(new Object[]{ type, subRange, numToken }); - } - } - } - return params; - } - - @BeforeClass - public static void setupClass() throws Exception - { - setupSeed(); - updateConfigs(); - DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); - ServerTestUtils.prepareServerNoRegister(); - - Set tokens1 = BootStrapper.getRandomTokens(ClusterMetadata.current(), curTokenRange); - ServerTestUtils.registerLocal(tokens1); - // Ensure that the on-disk format statics are loaded before the test run - Version.LATEST.onDiskFormat(); - StorageService.instance.doAutoRepairSetup(); - - SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); - } - - @Test - public void testTokenRangesSplitByTable() + public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) { - int numberOfSplits = calcSplits(numberOfSubRanges); + int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(numTokens, tokens.size()); @@ -147,18 +89,17 @@ public void testTokenRangesSplitByTable() List> expectedTokensForATable = new ArrayList<>(); for (int j = 0; j < assignmentsPerTable; j++) { - assertEquals(Arrays.asList(tables.get(i)), assignments.get(i*assignmentsPerTable + j).getTableNames()); - assignmentForATable.add(assignments.get(i*assignmentsPerTable+j)); - expectedTokensForATable.add(expectedToken.get(i*assignmentsPerTable+j)); + assertEquals(Arrays.asList(tables.get(i)), assignments.get(i * assignmentsPerTable + j).getTableNames()); + assignmentForATable.add(assignments.get(i * assignmentsPerTable + j)); + expectedTokensForATable.add(expectedToken.get(i * assignmentsPerTable + j)); } - compare(numberOfSplits, expectedTokensForATable, assignmentForATable); + compare(numTokens, numberOfSplits, expectedTokensForATable, assignmentForATable); } } - @Test - public void testTokenRangesSplitByKeyspace() + public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) { - int numberOfSplits = calcSplits(numberOfSubRanges); + int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(numTokens, tokens.size()); @@ -185,13 +126,12 @@ public void testTokenRangesSplitByKeyspace() assertEquals(numTokens * numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); - compare(numberOfSplits, expectedToken, assignments); + compare(numTokens, numberOfSplits, expectedToken, assignments); } - @Test - public void testTokenRangesWithDefaultSplit() + public static void testTokenRangesWithDefaultSplit(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) { - int numberOfSplits = calcSplits(DEFAULT_SUBRANGES); + int numberOfSplits = calcSplits(numTokens, DEFAULT_NUMBER_OF_SUBRANGES); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(numTokens, tokens.size()); List> expectedToken = new ArrayList<>(); @@ -215,10 +155,10 @@ public void testTokenRangesWithDefaultSplit() // should be 3 entries for the table which covers each token range. assertEquals(numTokens * numberOfSplits, assignments.size()); - compare(numberOfSplits, expectedToken, assignments); + compare(numTokens, numberOfSplits, expectedToken, assignments); } - private void compare(int numberOfSplits, List> expectedToken, List assignments) + private static void compare(int numTokens, int numberOfSplits, List> expectedToken, List assignments) { assertEquals(expectedToken.size(), assignments.size()); Set> a = new TreeSet<>(); @@ -231,7 +171,7 @@ private void compare(int numberOfSplits, List> expectedToken, List< assertEquals(a, b); } - private int calcSplits(int subRange) + private static int calcSplits(int numTokens, int subRange) { return Math.max(1, subRange / numTokens); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java new file mode 100644 index 000000000000..a92c7d2bc69b --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.dht.BootStrapper; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.sai.disk.format.Version; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; + +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; + +/** + * Unit tests for a setup that does not have v-nodes {@link FixedSplitTokenRangeSplitter} + */ +@RunWith(Parameterized.class) +public class FixedSplitTokenRangeSplitterNoVNodesTest +{ + private static final int numTokens = 1; + + @Parameterized.Parameter(0) + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public int numberOfSubRanges; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}") + public static Collection parameters() + { + List params = new ArrayList<>(); + for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) + { + for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) + { + params.add(new Object[]{ type, subRange }); + } + } + return params; + } + + @BeforeClass + public static void setupClass() throws Exception + { + setupSeed(); + updateConfigs(); + DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); + ServerTestUtils.prepareServerNoRegister(); + + Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); + ServerTestUtils.registerLocal(tokens); + // Ensure that the on-disk format statics are loaded before the test run + Version.LATEST.onDiskFormat(); + StorageService.instance.doAutoRepairSetup(); + + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + } + + @Test + public void testTokenRangesSplitByTable() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByTable(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesSplitByKeyspace() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByKeyspace(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesWithDefaultSplit() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, numberOfSubRanges, repairType); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java new file mode 100644 index 000000000000..1b48537968d4 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.dht.BootStrapper; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.sai.disk.format.Version; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; + +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; + +/** + * Unit tests for a setup that has v-nodes {@link FixedSplitTokenRangeSplitter} + */ +@RunWith(Parameterized.class) +public class FixedSplitTokenRangeSplitterVNodesTest +{ + private static final int numTokens = 16; + + @Parameterized.Parameter(0) + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public int numberOfSubRanges; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}") + public static Collection parameters() + { + List params = new ArrayList<>(); + for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) + { + for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) + { + params.add(new Object[]{ type, subRange }); + } + } + return params; + } + + @BeforeClass + public static void setupClass() throws Exception + { + setupSeed(); + updateConfigs(); + DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); + ServerTestUtils.prepareServerNoRegister(); + + Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); + ServerTestUtils.registerLocal(tokens); + // Ensure that the on-disk format statics are loaded before the test run + Version.LATEST.onDiskFormat(); + StorageService.instance.doAutoRepairSetup(); + + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + } + + @Test + public void testTokenRangesSplitByTable() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByTable(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesSplitByKeyspace() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByKeyspace(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesWithDefaultSplit() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, numberOfSubRanges, repairType); + } +} From 418a87a6682a1b3fbcfc74b2af4cbd46f3fcc595 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 14:36:52 -0700 Subject: [PATCH 166/257] Adjust the comment --- .../autorepair/FixedSplitTokenRangeSplitter.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 953216832f1f..720e2812a5b0 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -46,13 +46,15 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt private static final Logger logger = LoggerFactory.getLogger(FixedSplitTokenRangeSplitter.class); /** - * Selecting the default value is tricky. If we select a small number then individual repair would be heavy, - * on the other hand, if we select large then too many repair sessions would be created. + * Selecting the default value is tricky. If we select a small number, individual repairs would be heavy. + * On the other hand, if we select a large number, too many repair sessions would be created. *

        - * The default 64 is a good number for non v-nodes, but with v-nodes it can result in too many repair sessions - * because the number of repair sessions on a node = number_of_subranges * num_token. + * The default value of 64 works well for non-vnodes, but with vnodes, it can result in too many repair sessions + * because the total number of repair sessions on a node is calculated as: *

        - * To keep it balances, 32 serves a good default that would cater to both v-nodes and non v-nodes. + * number_of_subranges * num_token + *

        + * To maintain balance, 32 serves as a good default that accommodates both vnodes and non-vnodes effectively. */ public static final int DEFAULT_NUMBER_OF_SUBRANGES = 32; From cc00a4efec7b950dc7f1d0688221dc220966df25 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 20:16:41 -0700 Subject: [PATCH 167/257] Add the missing nodetool way to update repair_by_keyspace --- .../org/apache/cassandra/service/AutoRepairService.java | 6 ++++++ .../apache/cassandra/service/AutoRepairServiceMBean.java | 3 +++ src/java/org/apache/cassandra/tools/NodeProbe.java | 5 +++++ .../cassandra/tools/nodetool/SetAutoRepairConfig.java | 6 +++++- 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 1c96ab685d2c..0cf744296e07 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -255,6 +255,12 @@ public void setAutoRepairTokenRangeSplitterParameter(String repairType, String k config.getTokenRangeSplitterInstance(RepairType.parse(repairType)).setParameter(key, value); } + @Override + public void setRepairByKeyspace(String repairType, boolean repairByKeyspace) + { + config.setRepairByKeyspace(RepairType.parse(repairType), repairByKeyspace); + } + private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig config) { StringBuilder sb = new StringBuilder(); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index f3ec919c8f1b..d57329e86795 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -68,4 +68,7 @@ public interface AutoRepairServiceMBean public Set getOnGoingRepairHostIds(String repairType); public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value); + + public void setRepairByKeyspace(String repairType, boolean repairByKeyspace); + } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index cab5153a70d3..723894caa093 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2679,6 +2679,11 @@ public Set getAutoRepairOnGoingRepairHostIds(String repairType) { return autoRepairProxy.getOnGoingRepairHostIds(repairType); } + + public void setAutoRepairRepairByKeyspace(String repairType, boolean enabled) + { + autoRepairProxy.setRepairByKeyspace(repairType, enabled); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 9f22a43c4eb9..37f809943fb6 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -48,7 +48,8 @@ public class SetAutoRepairConfig extends NodeToolCmd "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + "|parallel_repair_count|parallel_repair_percentage|materialized_view_repair_enabled|repair_max_retries" + - "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration|token_range_splitter.]", + "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration" + + "|repair_by_keyspace|token_range_splitter.]", required = true) protected List args = new ArrayList<>(); @@ -159,6 +160,9 @@ public void execute(NodeProbe probe) case "repair_session_timeout": probe.setAutoRepairSessionTimeout(repairTypeStr, paramVal); break; + case "repair_by_keyspace": + probe.setAutoRepairRepairByKeyspace(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; default: throw new IllegalArgumentException("Unknown parameter: " + paramType); } From bf43c973963c7154cb5617ca862bad23bcb9fe46 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 20:31:05 -0700 Subject: [PATCH 168/257] Use common setup code for FixedSplitTokenRangeSplitterTest*.java --- .../FixedSplitTokenRangeSplitterHelper.java | 26 +++++++++++++++++ ...edSplitTokenRangeSplitterNoVNodesTest.java | 28 +------------------ ...ixedSplitTokenRangeSplitterVNodesTest.java | 28 +------------------ 3 files changed, 28 insertions(+), 54 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index 4b4f41375f8c..5c04351325aa 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -30,11 +30,20 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.dht.BootStrapper; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.sai.disk.format.Version; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_NUMBER_OF_SUBRANGES; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -52,6 +61,23 @@ public class FixedSplitTokenRangeSplitterHelper private static final String TABLE3 = "tbl3"; public static final String KEYSPACE = "ks"; + public static void setupClass(int numTokens) throws Exception + { + setupSeed(); + updateConfigs(); + DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); + ServerTestUtils.prepareServerNoRegister(); + + Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); + ServerTestUtils.registerLocal(tokens); + // Ensure that the on-disk format statics are loaded before the test run + Version.LATEST.onDiskFormat(); + StorageService.instance.doAutoRepairSetup(); + + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + } + public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) { int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java index a92c7d2bc69b..5b86728e574d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java @@ -22,26 +22,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.Set; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.dht.BootStrapper; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.index.sai.disk.format.Version; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadata; - -import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; - /** * Unit tests for a setup that does not have v-nodes {@link FixedSplitTokenRangeSplitter} */ @@ -73,19 +59,7 @@ public static Collection parameters() @BeforeClass public static void setupClass() throws Exception { - setupSeed(); - updateConfigs(); - DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); - ServerTestUtils.prepareServerNoRegister(); - - Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); - ServerTestUtils.registerLocal(tokens); - // Ensure that the on-disk format statics are loaded before the test run - Version.LATEST.onDiskFormat(); - StorageService.instance.doAutoRepairSetup(); - - SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + FixedSplitTokenRangeSplitterHelper.setupClass(numTokens); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java index 1b48537968d4..8ca30bd2017c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java @@ -22,26 +22,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.Set; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.dht.BootStrapper; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.index.sai.disk.format.Version; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadata; - -import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; - /** * Unit tests for a setup that has v-nodes {@link FixedSplitTokenRangeSplitter} */ @@ -73,19 +59,7 @@ public static Collection parameters() @BeforeClass public static void setupClass() throws Exception { - setupSeed(); - updateConfigs(); - DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); - ServerTestUtils.prepareServerNoRegister(); - - Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); - ServerTestUtils.registerLocal(tokens); - // Ensure that the on-disk format statics are loaded before the test run - Version.LATEST.onDiskFormat(); - StorageService.instance.doAutoRepairSetup(); - - SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + FixedSplitTokenRangeSplitterHelper.setupClass(numTokens); } @Test From b373f266145de558e590089b151a052aeb685213 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 20:39:19 -0700 Subject: [PATCH 169/257] Fix FixedSplitTokenRangeSplitterHelper.java failure --- .../repair/autorepair/FixedSplitTokenRangeSplitterHelper.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index 5c04351325aa..f041873b79a7 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -27,9 +27,6 @@ import java.util.Set; import java.util.TreeSet; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; @@ -53,7 +50,6 @@ /** * Helper class for for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} */ -@RunWith(Parameterized.class) public class FixedSplitTokenRangeSplitterHelper { private static final String TABLE1 = "tbl1"; From 0f8c89050e0a400d50c4acfc6cf76b434d0f84df Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 10 Mar 2025 16:22:11 -0700 Subject: [PATCH 170/257] Address review comments: update comment/remove dead code --- .../repair/autorepair/FixedSplitTokenRangeSplitter.java | 6 +++--- .../autorepair/FixedSplitTokenRangeSplitterHelper.java | 7 +++---- .../FixedSplitTokenRangeSplitterNoVNodesTest.java | 2 +- .../autorepair/FixedSplitTokenRangeSplitterVNodesTest.java | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 720e2812a5b0..2879719fdb95 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -49,10 +49,10 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt * Selecting the default value is tricky. If we select a small number, individual repairs would be heavy. * On the other hand, if we select a large number, too many repair sessions would be created. *

        - * The default value of 64 works well for non-vnodes, but with vnodes, it can result in too many repair sessions - * because the total number of repair sessions on a node is calculated as: + * The default value of 64 works well for non-vnodes, but with vnodes, it might result in too many repair sessions + * and to cap the number for vnodes, we use the following formula: *

        - * number_of_subranges * num_token + * Math.max(1, numberOfSubranges / tokens.size()) *

        * To maintain balance, 32 serves as a good default that accommodates both vnodes and non-vnodes effectively. */ diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index f041873b79a7..cfa0748f1f7f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -48,7 +48,7 @@ import static org.junit.Assert.assertTrue; /** - * Helper class for for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} + * Helper class for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} */ public class FixedSplitTokenRangeSplitterHelper { @@ -111,7 +111,7 @@ public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRan List> expectedTokensForATable = new ArrayList<>(); for (int j = 0; j < assignmentsPerTable; j++) { - assertEquals(Arrays.asList(tables.get(i)), assignments.get(i * assignmentsPerTable + j).getTableNames()); + assertEquals(Collections.singletonList(tables.get(i)), assignments.get(i * assignmentsPerTable + j).getTableNames()); assignmentForATable.add(assignments.get(i * assignmentsPerTable + j)); expectedTokensForATable.add(expectedToken.get(i * assignmentsPerTable + j)); } @@ -125,7 +125,6 @@ public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSub AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(numTokens, tokens.size()); - List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (Range range : tokens) { @@ -151,7 +150,7 @@ public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSub compare(numTokens, numberOfSplits, expectedToken, assignments); } - public static void testTokenRangesWithDefaultSplit(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) + public static void testTokenRangesWithDefaultSplit(int numTokens, AutoRepairConfig.RepairType repairType) { int numberOfSplits = calcSplits(numTokens, DEFAULT_NUMBER_OF_SUBRANGES); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java index 5b86728e574d..a30f3aa76246 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java @@ -77,6 +77,6 @@ public void testTokenRangesSplitByKeyspace() @Test public void testTokenRangesWithDefaultSplit() { - FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, numberOfSubRanges, repairType); + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, repairType); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java index 8ca30bd2017c..6839748d1f01 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java @@ -77,6 +77,6 @@ public void testTokenRangesSplitByKeyspace() @Test public void testTokenRangesWithDefaultSplit() { - FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, numberOfSubRanges, repairType); + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, repairType); } } From 952bb2f36e7a944b2cae0576b375be521df06a82 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 11 Mar 2025 09:43:55 -0700 Subject: [PATCH 171/257] Adjust comment for DEFAULT_NUMBER_OF_SUBRANGES and NUMBER_OF_SUBRANGES --- .../FixedSplitTokenRangeSplitter.java | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 2879719fdb95..a6dddb3060bb 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -49,8 +49,8 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt * Selecting the default value is tricky. If we select a small number, individual repairs would be heavy. * On the other hand, if we select a large number, too many repair sessions would be created. *

        - * The default value of 64 works well for non-vnodes, but with vnodes, it might result in too many repair sessions - * and to cap the number for vnodes, we use the following formula: + * If vnodes are configured using num_tokens, attempts to evenly subdivide subranges by each range + * using the following formula: *

        * Math.max(1, numberOfSubranges / tokens.size()) *

        @@ -59,19 +59,12 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt public static final int DEFAULT_NUMBER_OF_SUBRANGES = 32; /** - * The number of subranges to split each to-be-repaired token range into. Defaults to 1. + * Number of evenly split subranges to create for each node that repair runs for. *

        - * The higher this number, the smaller the repair sessions will be. - *

        - * If you are using vnodes, say 256, then the repair will always go one vnode range at a time. This parameter, - * additionally, will let us further subdivide a given vnode range into subranges. - *

        - * With the value "1" and vnodes of 256, a given table on a node will undergo the repair 256 times. But with a - * value "2", the same table on a node will undergo a repair 512 times because every vnode range will be further - * divided by two. - *

        - * If you do not use vnodes or the number of vnodes is pretty small, say 8, setting this value to a higher number, - * such as 16, will be useful to repair on a smaller range, and the chance of succeeding is higher. + * If vnodes are configured using num_tokens, attempts to evenly subdivide subranges by each range. + * For example, for num_tokens: 16 and number_of_subranges: 32, 2 (32/16) + * repair assignments will be created for each token range. At least one repair assignment will be + * created for each token range. */ static final String NUMBER_OF_SUBRANGES = "number_of_subranges"; From bb944f0e523aea62d5372f80650cf184f69114a1 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Fri, 7 Mar 2025 23:06:35 -0600 Subject: [PATCH 172/257] Auto Repair Documentation improvements, bytes_per_assignment to 50GiB Updates auto repair documentation and moves it under managing/operating/auto_repair.adoc. Updates existing repair documentation to reference the auto repair documentation. Updates bytes_per_assignment default to 50GiB (from 200GiB) for all report types to be more conservative. Clean up some syntactial issues and formatting in cassandra.yaml files. patch by Andy Tolbert; reviewed by ____ for CASSANDRA-20421 --- conf/cassandra.yaml | 69 ++- conf/cassandra_latest.yaml | 68 ++- doc/modules/cassandra/nav.adoc | 3 +- .../cassandra/pages/auto-repair/overview.adoc | 211 --------- .../pages/managing/operating/auto_repair.adoc | 410 ++++++++++++++++++ .../operating/compaction/tombstones.adoc | 3 +- .../pages/managing/operating/index.adoc | 3 +- .../pages/managing/operating/repair.adoc | 16 +- .../repair/autorepair/AutoRepairConfig.java | 9 +- .../autorepair/RepairTokenRangeSplitter.java | 147 ++----- .../autorepair/AutoRepairConfigTest.java | 13 +- 11 files changed, 567 insertions(+), 385 deletions(-) delete mode 100644 doc/modules/cassandra/pages/auto-repair/overview.adoc create mode 100644 doc/modules/cassandra/pages/managing/operating/auto_repair.adoc diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 39e0ab776964..57f053c7c07a 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2679,9 +2679,15 @@ storage_compatibility_mode: NONE # incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) # then set the ratio to 0.0 -# incremental_repair_disk_headroom_reject_ratio = 0.2; +# incremental_repair_disk_headroom_reject_ratio: 0.2; -# Configuration for AutoRepair Scheduler. +# Configuration for Auto Repair Scheduler. +# +# This feature is disabled by default. +# +# See: https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html for an overview of this +# feature. +# # auto_repair: # # Enable/Disable the auto-repair scheduler. # # If set to false, the scheduler thread will not be started. @@ -2693,17 +2699,33 @@ storage_compatibility_mode: NONE # full: # # Enable/Disable full auto-repair # enabled: true -# # Minimum duration between repairing the same node again. This is useful for tiny clusters, such as -# # clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler -# # completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node -# # until 24 hours have passed since the last repair. +# # Minimum duration between repairing the same node again. This is useful for tiny clusters, +# # such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one +# # round on all nodes in less than this duration, it will not start a new repair round on a given node until +# # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce +# # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than +# # gc_grace_seconds to avoid zombies. # min_repair_interval: 24h # token_range_splitter: +# # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges +# # for repair assignments. +# # +# # Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter, +# # FixedTokenRangeSplitter}. +# # +# # - RepairTokenRangeSplitter (default) attempts to intelligently split ranges based on data size and partition +# # count. +# # - FixedTokenRangeSplitter splits into fixed ranges based on the 'number_of_subranges' option. +# # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# +# # Optional parameters can be specified in the form of: +# # parameters: +# # param_key1: param_value1 # parameters: # # The target and maximum amount of bytes that should be included in a repair # # assignment. This scopes the amount of work involved in a repair and includes # # the data covering the range being repaired. -# bytes_per_assignment: 200GiB +# bytes_per_assignment: 50GiB # # The maximum number of bytes to cover in an individual schedule. This serves as # # a mechanism to throttle the work done in each repair cycle. You may reduce this # # value if the impact of repairs is causing too much load on the cluster or increase it @@ -2712,29 +2734,32 @@ storage_compatibility_mode: NONE # # This is set to a large value for full repair to attempt to repair all data per repair schedule. # max_bytes_per_schedule: 100000GiB # incremental: -# enabled: true -# # Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps -# # the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent -# # schedule such as 1h is recommended. -# # When turning on incremental repair for the first time with a decent amount of data it may be advisable to -# # increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. -# min_repair_interval: 1h +# enabled: false +# # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair +# # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, +# # so a more frequent schedule such as 1h is recommended. +# # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to +# # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental +# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep to unrepaired +# # set small. +# min_repair_interval: 24h # token_range_splitter: # parameters: # # Configured to attempt repairing 50GiB of data per repair. -# # This throttles the amount of incremental repair and anticompaction done per schedule -# # after incremental repairs are turned on. +# # This throttles the amount of incremental repair and anticompaction done per schedule after incremental +# # repairs are turned on. # bytes_per_assignment: 50GiB -# # The maximum number of bytes to cover in an individual schedule to 100GiB. -# # Consider increasing max_bytes_per_schedule if -# # more data is written than this limit within the min_repair_interval. +# # Resricts the maximum number of bytes to cover in an individual schedule to 100GiB. +# # Consider increasing this if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: -# enabled: true +# # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired +# # data set. +# enabled: false # min_repair_interval: 24h # token_range_splitter: # parameters: -# bytes_per_assignment: 200GiB +# bytes_per_assignment: 50GiB # max_bytes_per_schedule: 100000GiB # # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule # # repairs. @@ -2760,7 +2785,7 @@ storage_compatibility_mode: NONE # # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. # parallel_repair_count: 3 # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value -# # is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. +# # is used. # parallel_repair_percentage: 3 # # Repairs materialized views if true. # materialized_view_repair_enabled: false diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index da44d4f7b999..e3d0c2e89d23 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2374,9 +2374,15 @@ storage_compatibility_mode: NONE # incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) # then set the ratio to 0.0 -# incremental_repair_disk_headroom_reject_ratio = 0.2; +# incremental_repair_disk_headroom_reject_ratio: 0.2; -# Configuration for AutoRepair Scheduler. +# Configuration for Auto Repair Scheduler. +# +# This feature is disabled by default. +# +# See: https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html for an overview of this +# feature. +# # auto_repair: # # Enable/Disable the auto-repair scheduler. # # If set to false, the scheduler thread will not be started. @@ -2388,17 +2394,33 @@ storage_compatibility_mode: NONE # full: # # Enable/Disable full auto-repair # enabled: true -# # Minimum duration between repairing the same node again. This is useful for tiny clusters, such as -# # clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler -# # completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node -# # until 24 hours have passed since the last repair. +# # Minimum duration between repairing the same node again. This is useful for tiny clusters, +# # such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one +# # round on all nodes in less than this duration, it will not start a new repair round on a given node until +# # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce +# # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than +# # gc_grace_seconds to avoid zombies. # min_repair_interval: 24h # token_range_splitter: +# # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges +# # for repair assignments. +# # +# # Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter, +# # FixedTokenRangeSplitter}. +# # +# # - RepairTokenRangeSplitter (default) attempts to intelligently split ranges based on data size and partition +# # count. +# # - FixedTokenRangeSplitter splits into fixed ranges based on the 'number_of_subranges' option. +# # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# +# # Optional parameters can be specified in the form of: +# # parameters: +# # param_key1: param_value1 # parameters: # # The target and maximum amount of bytes that should be included in a repair # # assignment. This scopes the amount of work involved in a repair and includes # # the data covering the range being repaired. -# bytes_per_assignment: 200GiB +# bytes_per_assignment: 50GiB # # The maximum number of bytes to cover in an individual schedule. This serves as # # a mechanism to throttle the work done in each repair cycle. You may reduce this # # value if the impact of repairs is causing too much load on the cluster or increase it @@ -2407,29 +2429,33 @@ storage_compatibility_mode: NONE # # This is set to a large value for full repair to attempt to repair all data per repair schedule. # max_bytes_per_schedule: 100000GiB # incremental: +# # Enable incremental repair by default for new clusters. # enabled: true -# # Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps -# # the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent -# # schedule such as 1h is recommended. -# # When turning on incremental repair for the first time with a decent amount of data it may be advisable to -# # increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. -# min_repair_interval: 1h +# # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair +# # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, +# # so a more frequent schedule such as 1h is recommended. +# # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to +# # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental +# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep to unrepaired +# # set small. +# min_repair_interval: 24h # token_range_splitter: # parameters: # # Configured to attempt repairing 50GiB of data per repair. -# # This throttles the amount of incremental repair and anticompaction done per schedule -# # after incremental repairs are turned on. +# # This throttles the amount of incremental repair and anticompaction done per schedule after incremental +# # repairs are turned on. # bytes_per_assignment: 50GiB -# # The maximum number of bytes to cover in an individual schedule to 100GiB. -# # Consider increasing max_bytes_per_schedule if -# # more data is written than this limit within the min_repair_interval. +# # Resricts the maximum number of bytes to cover in an individual schedule to 100GiB. +# # Consider increasing this if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: -# enabled: true +# # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired +# # data set. +# enabled: false # min_repair_interval: 24h # token_range_splitter: # parameters: -# bytes_per_assignment: 200GiB +# bytes_per_assignment: 50GiB # max_bytes_per_schedule: 100000GiB # # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule # # repairs. @@ -2455,7 +2481,7 @@ storage_compatibility_mode: NONE # # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. # parallel_repair_count: 3 # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value -# # is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. +# # is used. # parallel_repair_percentage: 3 # # Repairs materialized views if true. # materialized_view_repair_enabled: false diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc index dd9dd1054df9..311bfb16c585 100644 --- a/doc/modules/cassandra/nav.adoc +++ b/doc/modules/cassandra/nav.adoc @@ -99,6 +99,7 @@ ***** xref:cassandra:managing/operating/fqllogging.adoc[Full query logging] **** xref:cassandra:managing/operating/metrics.adoc[Monitoring metrics] **** xref:cassandra:managing/operating/repair.adoc[Repair] +**** xref:cassandra:managing/operating/auto_repair.adoc[Auto Repair] **** xref:cassandra:managing/operating/read_repair.adoc[Read repair] **** xref:cassandra:managing/operating/security.adoc[Security] **** xref:cassandra:managing/operating/snitch.adoc[Snitches] @@ -126,4 +127,4 @@ *** xref:reference/static.adoc[Static columns] *** xref:reference/vector-data-type.adoc[Vector data type] -** xref:integrating/plugins/index.adoc[] \ No newline at end of file +** xref:integrating/plugins/index.adoc[] diff --git a/doc/modules/cassandra/pages/auto-repair/overview.adoc b/doc/modules/cassandra/pages/auto-repair/overview.adoc deleted file mode 100644 index 698e53c44677..000000000000 --- a/doc/modules/cassandra/pages/auto-repair/overview.adoc +++ /dev/null @@ -1,211 +0,0 @@ -= Overview of Auto Repair -:navtitle: Auto Repair overview -:description: Auto Repair concepts - How it works, how to configure it, and more. -:keywords: CEP-37 - -Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This -significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit and -manage repairs. - -At a high level, a dedicated thread pool is assigned to the repair scheduler. The repair scheduler in Cassandra -maintains a new replicated table, system_distributed.auto_repair_history, which stores the repair history for all nodes, -including details such as the last repair time. The scheduler selects the node(s) to begin repairs and orchestrates the -process to ensure that every table and its token ranges are repaired. The algorithm can run repairs simultaneously on -multiple nodes and splits token ranges into subranges, with necessary retries to handle transient failures. Automatic -repair starts as soon as a Cassandra cluster is launched, similar to compaction, and does not require human -intervention. - -The scheduler currently supports Full, Incremental, and Preview repair types with the following features. New repair -types, such as Paxos repair or other future repair mechanisms, can be integrated with minimal development effort! - - -=== Features -- Capability to run repairs on multiple nodes simultaneously. -- A default implementation and an interface to override the dataset being repaired per session. -- Extendable token split algorithms with two implementations readily available: -. Splits token ranges by placing a cap on the size of data repaired in one session and a maximum cap at the schedule -level (default). -. Splits tokens evenly based on the specified number of splits. -- A new CQL table property offering: -. The ability to disable specific repair types at the table level, allowing the scheduler to skip one or more tables. -. Configuring repair priorities for certain tables to prioritize them over others. -- Dynamic enablement or disablement of the scheduler for each repair type. -- Configurable settings tailored to each repair job. -- Rich configuration options for each repair type (e.g., Full, Incremental, or Preview repairs). -- Comprehensive observability features that allow operators to configure alarms as needed. - -=== Yaml Configuration - -==== Top level settings -The following settings are defined at the top level of the configuration file and apply universally across all -repair types. - -[cols=",,",options="header",] -|=== -| Name | Default | Description -| enabled | false | Enable/Disable the auto-repair scheduler. If set to false, the scheduler thread will not be started. -If set to true, the repair scheduler thread will be created. The thread will check for secondary configuration available -for each repair type (full, incremental, and preview_repaired), and based on that, it will schedule repairs. -| repair_check_interval | 5m | Time interval between successive checks to see if ongoing repairs are complete or if it -is time to schedule repairs. -| repair_max_retries | 3 | Maximum number of retries for a repair session. -| repair_retry_backoff | 30s | Backoff time before retrying a repair session. -| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the -scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. -| history_clear_delete_hosts_buffer_interval | 2h | The scheduler needs to adjust its order when nodes leave the ring. -Deleted hosts are tracked in metadata. for a specified duration to ensure they are indeed removed before adjustments -are made to the schedule. -|=== - - -==== Repair level settings -The following settings can be tailored individually for each repair type. -[cols=",,",options="header",] -|=== -| Name | Default | Description -| enabled | false | Enable/Disable full/incremental/preview_repaired auto-repair -| repair_by_keyspace | true | If true, attempts to group tables in the same keyspace into one repair; otherwise, -each table is repaired individually. -| number_of_repair_threads | 1 | Number of threads to use for each repair job scheduled by the scheduler. Similar to -the -j option in nodetool repair. -| parallel_repair_count | 3 | Number of nodes running repair in parallel. If parallel_repair_percentage is set, the -larger value is used. -| parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If -parallel_repair_count is set, the larger value is used. Recommendation is that the repair cycle on the cluster should -finish within gc_grace_seconds. -| materialized_view_repair_enabled | false | Repairs materialized views if true. -| initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting -immediately after a restart. -| repair_session_timeout | 3h | Timeout for resuming stuck repair sessions. -| force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. -| sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 -SSTables to avoid penalizing good tables. -| table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair -proceeds to the next table. -| ignore_dcs | [] | Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify -data centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded data -centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to not run repair -schedule in certain data centers. -| repair_primary_token_range_only | true | Repair only the primary ranges owned by a node. Equivalent to the -pr option -in nodetool repair. Defaults to true. General advice is to keep this true. -| token_range_splitter | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Splitter implementation to -generate repair assignments. Defaults to RepairTokenRangeSplitter. -| token_range_splitter.partitions_per_assignment | 1048576 | Maximum number of partitions to include in a repair -assignment. Used to reduce number of partitions present in merkle tree leaf nodes to avoid overstreaming. -| token_range_splitter.max_tables_per_assignment | 64 | Maximum number of tables to include in a repair assignment. -This reduces the number of repairs, especially in keyspaces with many tables. The splitter avoids batching tables -together if they exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment. -|=== - -==== Full & Preview_Repaired repair level settings -The following settings can be tailored individually for each repair type. -[cols=",,",options="header",] -|=== -| min_repair_interval | 24h | Minimum duration between repairing the same node again. This is useful for tiny clusters, -such as clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler -completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node until 24 -hours have passed since the last repair. -| token_range_splitter.bytes_per_assignment | 200GiB | The target and maximum amount of bytes that should be included -in a repair assignment. This scopes the amount of work involved in a repair and includes the data covering the range -being repaired. -| token_range_splitter.max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover in an individual -schedule. This serves as a mechanism to throttle the work done in each repair cycle. You may reduce this value if the -impact of repairs is causing too much load on the cluster or increase it if writes outpace the amount of data being -repaired. Alternatively, adjust the min_repair_interval. This is set to a large value for full repair to attempt to -repair all data per repair schedule. -|=== - -==== Incremental repair level settings -The following settings can be customized for each repair type. -[cols=",,",options="header",] -|=== -| min_repair_interval | 1h | Incremental repairs operate over unrepaired data and should finish quickly. Running them -more frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, -so a more frequent schedule such as 1h is recommended. When turning on incremental repair for the first time with a -decent amount of data it may be advisable to increase this interval to 24h or longer to reduce the impact of -anticompaction caused by incremental repair. -| token_range_splitter.bytes_per_assignment | 50GiB | Configured to attempt repairing 50GiB of data per repair. -This throttles the amount of incremental repair and anticompaction done per schedule after incremental repairs are -turned on. -| token_range_splitter.max_bytes_per_schedule | 100GiB | The maximum number of bytes to cover in an individual schedule -to 100GiB. Consider increasing max_bytes_per_schedule if more data is written than this limit within -the min_repair_interval. -|=== - - -=== Nodetool Configuration -==== nodetool getautorepairconfig -``` -$> nodetool getautorepairconfig -repair scheduler configuration: - repair_check_interval: 5m - repair_max_retries: 3 - repair_retry_backoff: 30s - repair_task_min_duration: 5s - history_clear_delete_hosts_buffer_interval: 2h -configuration for repair_type: full - enabled: true - min_repair_interval: 24h - repair_by_keyspace: true - number_of_repair_threads: 1 - sstable_upper_threshold: 10000 - table_max_repair_time: 6h - ignore_dcs: [] - repair_primary_token_range_only: true - parallel_repair_count: 3 - parallel_repair_percentage: 3 - materialized_view_repair_enabled: false - initial_scheduler_delay: 5m - repair_session_timeout: 3h - force_repair_new_node: false - token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter - token_range_splitter.bytes_per_assignment: 200GiB - token_range_splitter.partitions_per_assignment: 1048576 - token_range_splitter.max_tables_per_assignment: 64 - token_range_splitter.max_bytes_per_schedule: 100000GiB -configuration for repair_type: incremental - enabled: true - min_repair_interval: 1h - repair_by_keyspace: true - number_of_repair_threads: 1 - sstable_upper_threshold: 10000 - table_max_repair_time: 6h - ignore_dcs: [] - repair_primary_token_range_only: true - parallel_repair_count: 3 - parallel_repair_percentage: 3 - materialized_view_repair_enabled: false - initial_scheduler_delay: 5m - repair_session_timeout: 3h - force_repair_new_node: false - token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter - token_range_splitter.bytes_per_assignment: 50GiB - token_range_splitter.partitions_per_assignment: 1048576 - token_range_splitter.max_tables_per_assignment: 64 - token_range_splitter.max_bytes_per_schedule: 100GiB -configuration for repair_type: preview_repaired - enabled: false -``` - -==== nodetool autorepairstatus -``` -$> nodetool autorepairstatus -t incremental -Active Repairs -425cea55-09aa-46e0-8911-9f37a4424574 - - -$> nodetool autorepairstatus -t full -Active Repairs -NONE - -``` - -==== nodetool setautorepairconfig -``` -$> nodetool setautorepairconfig -t incremental number_of_repair_threads 2 -``` - - - -==== More details -https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution[CEP-37] \ No newline at end of file diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc new file mode 100644 index 000000000000..db50a8c4524b --- /dev/null +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -0,0 +1,410 @@ += Auto Repair +:navtitle: Auto Repair +:description: Auto Repair concepts - How it works, how to configure it, and more. +:keywords: CEP-37, Repair, Incremental, Preview + +Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This +significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit and +manage repairs. + +At a high level, a dedicated thread pool is assigned to the repair scheduler. The repair scheduler in Cassandra +maintains a new replicated table, `system_distributed.auto_repair_history`, which stores the repair history for all +nodes, including details such as the last repair time. The scheduler selects the node(s) to begin repairs and +orchestrates the process to ensure that every table and its token ranges are repaired. + +The algorithm can run repairs simultaneously on multiple nodes and splits token ranges into subranges, with necessary +retries to handle transient failures. Automatic repair starts as soon as a Cassandra cluster is launched, similar to +compaction, and if configured appropriately, does not require human intervention. + +The scheduler currently supports Full, Incremental, and Preview repair types with the following features. New repair +types, such as Paxos repair or other future repair mechanisms, can be integrated with minimal development effort! + + +== Features +- Capability to run repairs on multiple nodes simultaneously. +- A default implementation and an interface to override the dataset being repaired per session. +- Extendable token split algorithms with two implementations readily available: +. Splits token ranges by placing a cap on the size of data repaired in one session and a maximum cap at the schedule +level using xref:#repair-token-range-splitter[RepairTokenRangeSplitter] (default). +. Splits tokens evenly based on the specified number of splits using +xref:#fixed-split-token-range-splitter[FixedSplitTokenRangeSplitter]. +- A new xref:#table-configuration[CQL table property] (`auto_repair`) offering: +. The ability to disable specific repair types at the table level, allowing the scheduler to skip one or more tables. +. Configuring repair priorities for certain tables to prioritize them over others. +- Dynamic enablement or disablement of the scheduler for each repair type. +- Configurable settings tailored to each repair job. +- Rich configuration options for each repair type (e.g., Full, Incremental, or Preview repairs). +- Comprehensive observability features that allow operators to configure alarms as needed. + +== Considerations + +Before enabling auto repair, please consult the xref:managing/operating/repair.adoc[Repair] guide to establish a base +understanding of repairs. + +=== Full Repair + +Full Repairs operate over all data in the token range being repaired. It is therefore important to run full repair +with a longer schedule and with smaller assignments. + +=== Incremental Repair + +When enabled from the inception of a cluster, incremental repairs operate over unrepaired data and should finish +quickly when run more frequently. + +Once incremental repair has been run, SSTables will be separated between data that have been incrementally repaired +and data that have not. Therefore, it is important to continually run incremental repair once it has been enabled so +newly written data can be compacted together with previously repaired data, allowing overwritten and expired data to +be eventually purged. + +Running incremental repair more frequently keeps the unrepaired set smaller and thus causes repairs to operate over +a smaller set of data, so a shorter `min_repair_interval` such as `1h` is recommended for new clusters. + +NOTE: When turning on incremental repair for the first time on an existing cluser with a large amount of data it is +advisable to use a larger interval such as `24h` or longer to reduce the impact of anticompaction caused by incremental +repair. Once the entire token range has been repaired, one may consider reducing this to a shorter interval. It is +strongly recommended to not enable incremental repair for an existing cluster unless full repair has previously +actively run. + +=== Previewing Repaired Data + +The `preview_repaired` repair type executes repairs over the repaired data set to detect possible data inconsistencies. + +Inconsistencies in the repaired data set should not happen in practice and could indicate a possible bug in incremental +repair. + +Running preview repairs is useful when considering using the +xref:cassandra:managing/operating/compaction/tombstones.adoc#deletion[only_purge_repaired_tombstones] table compaction +option to prevent data from possibly being resurrected when inconsistent replicas are missing tombstones from deletes. + +When enabled, the `BytesPreviewedDesynchronized` and `TokenRangesPreviewedDesynchronized` +xref:cassandra:managing/operating/metrics.adoc#table-metrics[table metrics] can be used to detect inconsistencies in the +repaired data set. + +== Configuring auto repair in cassandra.yaml + +Configuration for auto repair is managed in the `cassandra.yaml` file by the `auto_repair` property. + +A rich set of configuration exists for configuring auto repair with sensible defaults. However, the expectation +is that some tuning might be needed particulary when it comes to tuning how often repair should run +(`min_repair_interval`) and how repair assignments as created (`token_range_splitter`). + +The following is a practical example of an auto_repair configuration that one might use. + +[source, yaml] +---- +auto_repair: + enabled: true + repair_type_overrides: + full: + enabled: true + min_repair_interval: 5d + incremental: + enabled: true + min_repair_interval: 1h + token_range_splitter: + parameters: + bytes_per_assignment: 50GiB + max_bytes_per_schedule: 100GiB + preview_repaired: + enabled: true + min_repair_interval: 1d + global_settings: + repair_by_keyspace: true + parallel_repair_count: 1 +---- + + +=== Top level settings +The following settings are defined at the top level of the configuration file and apply universally across all +repair types. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| enabled | false | Enable/Disable the auto-repair scheduler. If set to false, the scheduler thread will not be started. +If set to true, the repair scheduler thread will be created. The thread will check for secondary configuration available +for each repair type (full, incremental, and preview_repaired), and based on that, it will schedule repairs. +| repair_check_interval | 5m | Time interval between successive checks to see if ongoing repairs are complete or if it +is time to schedule repairs. +| repair_max_retries | 3 | Maximum number of retries for a repair session. +| repair_retry_backoff | 30s | Backoff time before retrying a repair session. +| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the +scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. +| history_clear_delete_hosts_buffer_interval | 2h | The scheduler needs to adjust its order when nodes leave the ring. +Deleted hosts are tracked in metadata for a specified duration to ensure they are indeed removed before adjustments +are made to the schedule. +|=== + + +=== Repair level settings +The following settings can be configured globally using `global_settings` or tailored individually for each repair +type by using `repair_type_overrides`. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| enabled | false | Whether the given repair types should be enabled +| min_repair_interval | 24h | Minimum duration between repairing the same node again. This is useful for tiny clusters, +such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one round on all +nodes in less than this duration, it will not start a new repair round on a given node until this much time has +passed since the last repair completed. Consider increasing to a larger value to reduce the impact of repairs, +however *note that one should attempt to run repairs at a smaller interval than gc_grace_seconds to +avoid xref:cassandra:managing/operating/compaction/tombstones.adoc#zombies[zombies]*. +| class_name | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Implementation of +IAutoRepairTokenRangeSplitter to use; responsible for splitting token ranges for repair assignments. Out of the box, +Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter,FixedTokenRangeSplitter}. +| repair_by_keyspace | true | If true, attempts to group tables in the same keyspace into one repair; otherwise, +each table is repaired individually. +| number_of_repair_threads | 1 | Number of threads to use for each repair job scheduled by the scheduler. Similar to +the -j option in nodetool repair. +| parallel_repair_count | 3 | Number of nodes running repair in parallel. If `parallel_repair_percentage` is set, the +larger value is used. +| parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If +`parallel_repair_count is set`, the larger value is used. +| materialized_view_repair_enabled | false | Repairs materialized views if true. +| initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting +immediately after a restart. +| repair_session_timeout | 3h | Timeout for resuming stuck repair sessions. +| force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. +| sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. +| table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair +proceeds to the next table. +| ignore_dcs | [] | Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify +data centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded data +centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to not run repair +schedule in certain data centers. +| repair_primary_token_range_only | true | Repair only the primary ranges owned by a node. Equivalent to the -pr option +in nodetool repair. General advice is to keep this true. +|=== + +=== `RepairTokenRangeSplitter` configuration +[#repair-token-range-splitter] + +`RepairTokenRangeSplitter` is the default implementation of `IAutoRepairTokenRangeSplitter` that attempts to create +token range assignments meeting the following goals: + +1. *Create smaller, consistent repair times*: Long repairs, such as those lasting 15 hours, can be problematic. If a +node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact of +disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration becoming +the main bottleneck. + +2. *Minimize the impact on hosts*: Repairs should not heavily affect the host systems. For incremental repairs, this +might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide partitions +can lead to issues with disk usage and higher compaction costs. + +3. *Reduce overstreaming*: The Merkle tree, which represents data within each partition and range, has a maximum size. +If a repair covers too many partitions, the tree’s leaves represent larger data ranges. Even a small change in a leaf +can trigger excessive data streaming, making the process inefficient. + +4. *Reduce number of repairs*: If there are many small tables, it's beneficial to batch these tables together under a +single parent repair. This prevents the repair overhead from becoming a bottleneck, especially when dealing with +hundreds of tables. Running individual repairs for each table can significantly impact performance and efficiency. + +To achieve these goals, this implementation inspects SSTable metadata to estimate the bytes and number of partitions +within a range and splits it accordingly to bound the size of the token ranges used for repair assignments. + +==== Parameter defaults + +The following `parameters` include the same defaults for all repair types. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| partitions_per_assignment | 1048576 | Maximum number of partitions to include in a repair +assignment. Used to reduce number of partitions present in merkle tree leaf nodes to avoid overstreaming. +| max_tables_per_assignment | 64 | Maximum number of tables to include in a repair assignment. +This reduces the number of repairs, especially in keyspaces with many tables. The splitter avoids batching tables +together if they exceed other configuration parameters like `bytes_per_assignment` or `partitions_per_assignment`. +|=== + +==== Full & Preview Repaired repair defaults + +The following `parameters` defaults are established for both `full` and `preview_repaired` repair scheduling: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| bytes_per_assignment | 50GiB | The target and maximum amount of *compressed* bytes that should be included in a +repair assignment. *Note*: For full and preview_repaired, only the portion of an SSTable that covers the ranges +being repaired are accounted for in this calculation. +| max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover in an individual +schedule. This serves as a mechanism to throttle the work done in each repair cycle. You may reduce this value if the +impact of repairs is causing too much load on the cluster or increase it if writes outpace the amount of data being +repaired. Alternatively, adjust the `min_repair_interval`. This is set to a large value for full repair to attempt to +repair all data per repair schedule. +|=== + +==== Incremental repair defaults + +The following `parameters` defaults are established for `incremental` repair scheduling: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| bytes_per_assignment | 50GiB | The target and maximum amount of *compressed* bytes that should be +included in a repair assignment. *Note*: For incremental repair, the *entire size* of *unrepaired* SSTables +including ranges being repaired are accounted for in this calculation. This is to account for the anticompaction +work required to split the candidate data to repair from the data that won't be repaired. +| max_bytes_per_schedule | 100GiB | The maximum number of bytes to cover in an individual schedule. +Consider increasing if more data is written than this limit within the `min_repair_interval`. +|=== + +=== `FixedSplitTokenRangeSplitter` configuration +[#fixed-split-token-range-splitter] + +`FixedSplitTokenRangeSplitter` is a more simple implementation of `IAutoRepairTokenRangeSplitter` that creates repair +assignments by splitting a node's token ranges into an even number of splits. + +The following `parameters` apply for `FixedSplitTokenRangeSplitter` configuration: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| number_of_subranges | 64 | Number of evenly split subranges to create for each node that repair runs for. +If vnodes are configured using `num_tokens`, attempts to evenly subdivide subranges by each range. For example, for +`num_tokens: 16` and `number_of_subranges: 64`, 4 (64/16) repair assignments will be created for each token range. At +least one repair assignment will be created for each token range. +|=== + +=== Other cassandra.yaml Considerations + +==== Enable `reject_repair_compaction_threshold` + +When enabling auto_repair, it is advisable to configure the top level `reject_repair_compaction_threshold` +configuration in cassandra.yaml as a backpressure mechanism to reject new repairs on instances that have many +pending compactions. + +==== Tune `incremental_repair_disk_headroom_reject_ratio` + +By default, incremental repairs will be rejected if less than 20% of disk is available. If one wishes to be +conservative this top level configuration could be increased to a larger value to prevent filling your data directories. + +== Table configuration + +If auto repair is enabled in cassandra.yaml, the `auto_repair` property may be optionally configured at the table +level, e.g.: + +[source,cql] +---- +ALTER TABLE cycling.cyclist_races +WITH auto_repair = {'incremental_enabled': 'false', 'priority': '0'}; +---- + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| priority | 0 | Indicates the priority at which this table should be given when issuing repairs. The higher the number +the more priority will be given to repair the table (e.g. 3 will be repaired before 2). When `repair_by_keyspace` is +set to `true` tables sharing the same priority may be grouped in the same repair assignment. +| full_enabled | true | Whether full repair is enabled for this table. If full.enabled is not true in cassandra.yaml +this will not be evaluated. +| incremental_enabled | true | Whether incremental repair is enabled for this table. If incremental.enabled is not +true in cassandra.yaml this will not be evaluated. +| preview_repaired_enabled | true | Whether preview repair is enabled for this table. If preview_repaired.enabled is +not true in cassandra.yaml this will not be evaluated. +|=== + +== Nodetool Configuration +=== nodetool getautorepairconfig + +Retrieves the runtime configuration of Auto Repair for the targeted node. + +[source,none] +---- +$> nodetool getautorepairconfig +repair scheduler configuration: + repair_check_interval: 5m + repair_max_retries: 3 + repair_retry_backoff: 30s + repair_task_min_duration: 5s + history_clear_delete_hosts_buffer_interval: 2h +configuration for repair_type: full + enabled: true + min_repair_interval: 24h + repair_by_keyspace: true + number_of_repair_threads: 1 + sstable_upper_threshold: 10000 + table_max_repair_time: 6h + ignore_dcs: [] + repair_primary_token_range_only: true + parallel_repair_count: 3 + parallel_repair_percentage: 3 + materialized_view_repair_enabled: false + initial_scheduler_delay: 5m + repair_session_timeout: 3h + force_repair_new_node: false + token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter + token_range_splitter.bytes_per_assignment: 50GiB + token_range_splitter.partitions_per_assignment: 1048576 + token_range_splitter.max_tables_per_assignment: 64 + token_range_splitter.max_bytes_per_schedule: 100000GiB +configuration for repair_type: incremental + enabled: true + min_repair_interval: 1h + repair_by_keyspace: true + number_of_repair_threads: 1 + sstable_upper_threshold: 10000 + table_max_repair_time: 6h + ignore_dcs: [] + repair_primary_token_range_only: true + parallel_repair_count: 3 + parallel_repair_percentage: 3 + materialized_view_repair_enabled: false + initial_scheduler_delay: 5m + repair_session_timeout: 3h + force_repair_new_node: false + token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter + token_range_splitter.bytes_per_assignment: 50GiB + token_range_splitter.partitions_per_assignment: 1048576 + token_range_splitter.max_tables_per_assignment: 64 + token_range_splitter.max_bytes_per_schedule: 100GiB +configuration for repair_type: preview_repaired + enabled: false +---- + +=== nodetool autorepairstatus + +Provides currently running Auto Repair status. + +[source,none] +---- +$> nodetool autorepairstatus -t incremental +Active Repairs +425cea55-09aa-46e0-8911-9f37a4424574 + + +$> nodetool autorepairstatus -t full +Active Repairs +NONE + +---- + +=== nodetool setautorepairconfig + +Dynamic configuration changes can be made by using `setautorepairconfig`. Note that this only applies on the node being +targeted and these changes are not retained when a node is bounced. + +The following disables the `incremental` repair schedule: + +[source,none] +---- +$> nodetool setautorepairconfig -t incremental enabled false +---- + +The following adjusts the `min_repair_interval` option to `5d` specifically for the `full` repair schedule: + +[source,none] +---- +$> nodetool setautorepairconfig -t full min_repair_interval 5d +---- + +The following configures the `bytes_per_assignment` parameter for `incremental` repair's `token_range_splitter` to +`10GiB`: + +[source,none] +---- +$> nodetool setautorepairconfig -t incremental token_range_splitter.bytes_per_assignment 10GiB +---- + +==== More details +https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution[CEP-37] diff --git a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc index 9e0dcb6f7879..48b1a9b62e75 100644 --- a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc +++ b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc @@ -24,7 +24,6 @@ This approach is used instead of removing values because of the distributed natu Once an object is marked as a tombstone, queries will ignore all values that are time-stamped previous to the tombstone insertion. == Zombies - In a multi-node cluster, {cassandra} may store replicas of the same data on two or more nodes. This helps prevent data loss, but it complicates the deletion process. If a node receives a delete command for data it stores locally, the node tombstones the specified object and tries to pass the tombstone to other nodes containing replicas of that object. @@ -50,7 +49,7 @@ But if the node does not recover until after the grace period ends, {cassandra} After the tombstone's grace period ends, {cassandra} deletes the tombstone during compaction. -== Deletion +== Deletion After `gc_grace_seconds` has expired the tombstone may be removed (meaning there will no longer be any object that a certain piece of data was deleted). diff --git a/doc/modules/cassandra/pages/managing/operating/index.adoc b/doc/modules/cassandra/pages/managing/operating/index.adoc index 39dd508c4593..492af4dfec3b 100644 --- a/doc/modules/cassandra/pages/managing/operating/index.adoc +++ b/doc/modules/cassandra/pages/managing/operating/index.adoc @@ -14,7 +14,8 @@ * xref:cassandra:managing/operating/metrics.adoc[Monitoring metrics] * xref:cassandra:managing/operating/repair.adoc[Repair] * xref:cassandra:managing/operating/read_repair.adoc[Read repair] +* xref:cassandra:managing/operating/auto_repair.adoc[Auto Repair] * xref:cassandra:managing/operating/security.adoc[Security] * xref:cassandra:managing/operating/topo_changes.adoc[Topology changes] * xref:cassandra:managing/operating/transientreplication.adoc[Transient replication] -* xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] \ No newline at end of file +* xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] diff --git a/doc/modules/cassandra/pages/managing/operating/repair.adoc b/doc/modules/cassandra/pages/managing/operating/repair.adoc index 1823a6d4ef95..1d0cc977faee 100644 --- a/doc/modules/cassandra/pages/managing/operating/repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/repair.adoc @@ -29,10 +29,18 @@ for syncing up missed writes, but it doesn't protect against things like disk corruption, data loss by operator error, or bugs in Cassandra. For this reason, full repairs should still be run occasionally. -== Usage and Best Practices +== Automated Repair Scheduling -Since repair can result in a lot of disk and network io, it's not run -automatically by Cassandra. It is run by the operator via nodetool. +Since repair can result in a lot of disk and network io, it has +traditionally not been run automatically by Cassandra. + +In the latest version of Cassandra, a new feature called +xref:managing/operating/auto_repair.adoc[auto repair] was introduced to +allow Cassandra to run repairs automatically on a schedule. + +== Submitting Repairs Using Nodetool + +Repairs can also be run by the operator via nodetool. Incremental repair is the default and is run with the following command: @@ -63,7 +71,7 @@ nodetool repair [options] ---- -The repair command repairs token ranges only on the node being repaired; it does not repair the whole cluster. +The repair command repairs token ranges only on the node being repaired; it does not repair the whole cluster. By default, repair operates on all token ranges replicated by the node on which repair is run, causing duplicate work when running it on every node. Avoid duplicate work by using the `-pr` flag to repair only the "primary" ranges on a node. Do a full cluster repair by running the `nodetool repair -pr` command on each node in each datacenter in the cluster, until all of the nodes and datacenters are repaired. diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index ae3a70476993..c352c48b1140 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -412,14 +412,6 @@ private static Map initializeDefaultOption options.put(RepairType.INCREMENTAL, getDefaultOptions()); options.put(RepairType.PREVIEW_REPAIRED, getDefaultOptions()); - - options.get(RepairType.FULL).min_repair_interval = new DurationSpec.IntSecondsBound("24h"); - // Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps - // the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent - // increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. - options.get(RepairType.INCREMENTAL).min_repair_interval = new DurationSpec.IntSecondsBound("1h"); - options.get(RepairType.PREVIEW_REPAIRED).min_repair_interval = new DurationSpec.IntSecondsBound("24h"); - return options; } @@ -461,6 +453,7 @@ protected static Options getDefaultOptions() opts.token_range_splitter = new ParameterizedClass(DEFAULT_SPLITTER.getName(), Collections.emptyMap()); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); + opts.min_repair_interval = new DurationSpec.IntSecondsBound("24h"); return opts; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index d746d81b71f6..ea0973729873 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -65,104 +65,27 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; /** - * In Apache Cassandra, tuning repair ranges has four main goals: + * The default implementation of {@link IAutoRepairTokenRangeSplitter} that attempts to: *

          - *
        1. - * Create smaller, consistent repair times: Long repairs, such as those lasting 15 hours, can be problematic. - * If a node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact - * of disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration - * becoming the main bottleneck. - *
        2. - *
        3. - * Minimize the impact on hosts: Repairs should not heavily affect the host systems. For incremental repairs, - * this might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide - * partitions—can lead to issues with disk usage and higher compaction costs. - *
        4. - *
        5. - * Reduce overstreaming: The Merkle tree, which represents data within each partition and range, - * has a maximum size. If a repair covers too many partitions, the tree’s leaves represent larger data ranges. - * Even a small change in a leaf can trigger excessive data streaming, making the process inefficient. - *
        6. - *
        7. - * Reduce number of repairs: If there are many small tables, it's beneficial to batch these tables together - * under a single parent repair. This prevents the repair overhead from becoming a bottleneck, especially when - * dealing with hundreds of tables. Running individual repairs for each table can significantly impact performance - * and efficiency. - *
        8. + *
        9. Create smaller, consistent repair times
        10. + *
        11. Minimize the impact on hosts
        12. + *
        13. Reduce overstreaming
        14. + *
        15. Reduce number of repairs
        16. *
        - * To manage these issues, the strategy involves estimating the size and number of partitions within a range and - * splitting it accordingly to bound the size of the range splits. This is established by iterating over SSTable - * index files to estimate the amount of bytes and partitions involved in the ranges being repaired and by what - * repair type is being invoked. - *

        + *

        + * To achieve these goals, this implementation inspects SSTable metadata to estimate the bytes and number of partitions + * within a range and splits it accordingly to bound the size of the token ranges used for repair assignments. + *

        + *

        + * Refer to + * Auto Repair documentation for this implementation + * for a more thorough breakdown of this implementation. + *

        + *

        * While this splitter has a lot of tuning parameters, the expectation is that the established default configuration * shall be sensible for all {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType}'s. The following * configuration parameters are offered. - *

          - *
        • - * bytes_per_assigment: The target and maximum amount of bytes that should be included in a repair - * assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this - * involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being - * repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction - * that is expected. For all other repair types, this involves the amount of data covering the range being - * repaired. - *
        • - *
        • - * partitions_per_assignment: The maximum number of partitions that should be included in a repair - * assignment. This configuration exists to reduce excessive overstreaming by attempting to limit the number - * of partitions present in a merkle tree leaf node. - *
        • - *
        • - * max_tables_per_assignment: The maximum number of tables that can be included in a repair assignment. - * This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for - * a keyspace. Note that the splitter will avoid batching tables together if they exceed the other - * configuration parameters such as bytes_per_assignment and partitions_per_assignment. - *
        • - *
        • - * max_bytes_per_schedule: The maximum number of bytes to cover an individual schedule. This serves - * as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to - * reduce this value if the impact of repairs is causing too much load on the cluster, or increase it if - * writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up - * the min_repair_interval. - *
        • - *
        - * Given the impact of what each repair type accomplishes, different defaults are established per repair type. - *
          - *
        • - * full: Configured in a way that attempts to accomplish repairing all data in a schedule, with - * individual repairs targeting at most 200GiB of data and 1048576 partitions. - * max_bytes_per_schedule is set to a large value for full repair to attempt to repair all data per - * repair schedule. - *
            - *
          • bytes_per_assignment: 200GiB
          • - *
          • partitions_per_assignment: 1048576
          • - *
          • max_tables_per_assignment: 64
          • - *
          • max_bytes_per_schedule: 100TiB
          • - *
          - *
        • - *
        • - * incremental: Configured in a way that attempts to repair 50GiB of data per repair, and 100GiB per - * schedule. This attempts to throttle the amount of IR and anticompaction done per schedule after turning - * incremental on for the first time. You may want to consider increasing max_bytes_per_schedule - * more than this much data is written per min_repair_interval. - *
            - *
          • bytes_per_assignment: 50GiB
          • - *
          • partitions_per_assignment: 1048576
          • - *
          • max_tables_per_assignment: 64
          • - *
          • max_bytes_per_schedule: 100GiB
          • - *
          - *
        • - *
        • - * preview_repaired: Configured in a way that attempts to accomplish previewing all data in a schedule, - * with previews targeting at most 200GiB of data and 1048576 partitions. - *
            - *
          • bytes_per_assignment: 200GiB
          • - *
          • partitions_per_assignment: 1048576
          • - *
          • max_tables_per_assignment: 64
          • - *
          • max_bytes_per_schedule: 100TiB
          • - *
          - *
        • - *
        + *

        */ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { @@ -171,9 +94,25 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter // Default max bytes to 100TiB, which is much more readable than Long.MAX_VALUE private static final DataStorageSpec.LongBytesBound MAX_BYTES = new DataStorageSpec.LongBytesBound(100_000, DataStorageSpec.DataStorageUnit.GIBIBYTES); + /** + * The target bytes that should be included in a repair assignment + */ static final String BYTES_PER_ASSIGNMENT = "bytes_per_assignment"; + + /** + * Maximum number of partitions to include in a repair assignment + */ static final String PARTITIONS_PER_ASSIGNMENT = "partitions_per_assignment"; + + /** + * Maximum number of tables to include in a repair assignment if {@link AutoRepairConfig.Options#repair_by_keyspace} + * is enabled. + */ static final String MAX_TABLES_PER_ASSIGNMENT = "max_tables_per_assignment"; + + /** + * The maximum number of bytes to cover in an individual schedule + */ static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; static final List PARAMETERS = Arrays.asList(BYTES_PER_ASSIGNMENT, PARTITIONS_PER_ASSIGNMENT, MAX_TABLES_PER_ASSIGNMENT, MAX_BYTES_PER_SCHEDULE); @@ -193,24 +132,23 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter *

        * Defaults if not specified for the given repair type: *

      • - *
          bytes_per_assignment: 200GiB
        - *
          partitions_per_assignment: 2^repair_session_max_tree_depth
        + *
          bytes_per_assignment: 50GiB
        + *
          partitions_per_assignment: 1048576 (2^20)
        *
          max_tables_per_assignment: 64
        *
          max_bytes_per_schedule: 1000GiB
        *
      • - * It's expected that these defaults should work well for everything except incremental, so we confine - * bytes_per_assignment to 50GiB and max_bytes_per_schedule to 100GiB. This should strike a good balance - * between the amount of data that will be repaired during an initial migration to incremental repair and should - * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 100GiB of - * data is written to a node per min_repair_interval. + * It's expected that these defaults should work well for everything except incremental, where we + * max_bytes_per_schedule to 100GiB. This should strike a good balance between the amount of data that will be + * repaired during an initial migration to incremental repair and should move the entire repaired set from + * unrepaired to repaired at steady state, assuming not more the 100GiB of data is written to a node per + * min_repair_interval. */ private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) {{ put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) .build()); - // Restrict incremental repair to 50GB bytes per assignment to confine the amount of possible autocompaction. + // Restrict incremental repair to 100GB max bytes per schedule to confine the amount of possible autocompaction. put(AutoRepairConfig.RepairType.INCREMENTAL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.INCREMENTAL) - .withBytesPerAssignment(new DataStorageSpec.LongBytesBound("50GiB")) .withMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound("100GiB")) .build()); put(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.PREVIEW_REPAIRED) @@ -946,8 +884,8 @@ static RepairTypeDefaultsBuilder builder(AutoRepairConfig.RepairType repairType) static class RepairTypeDefaultsBuilder { private final AutoRepairConfig.RepairType repairType; - private DataStorageSpec.LongBytesBound bytesPerAssignment = new DataStorageSpec.LongBytesBound("200GiB"); - // Aims to target at most 1 partitons per leaf assuming a merkle tree of depth 20 (2^20 = 1,048,576) + private DataStorageSpec.LongBytesBound bytesPerAssignment = new DataStorageSpec.LongBytesBound("50GiB"); + // Aims to target at most 1 partitions per leaf assuming a merkle tree of depth 20 (2^20 = 1,048,576) private long partitionsPerAssignment = 1_048_576; private int maxTablesPerAssignment = 64; private DataStorageSpec.LongBytesBound maxBytesPerSchedule = MAX_BYTES; @@ -957,6 +895,7 @@ private RepairTypeDefaultsBuilder(AutoRepairConfig.RepairType repairType) this.repairType = repairType; } + @SuppressWarnings("unused") public RepairTypeDefaultsBuilder withBytesPerAssignment(DataStorageSpec.LongBytesBound bytesPerAssignment) { this.bytesPerAssignment = bytesPerAssignment; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index e8c60f2d9b50..2c3e4401ed9f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -41,7 +41,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; /** @@ -437,15 +436,7 @@ public void testDefaultOptions() assertEquals(new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()), options.token_range_splitter); assertEquals(new DurationSpec.IntSecondsBound("5m"), options.initial_scheduler_delay); assertEquals(new DurationSpec.IntSecondsBound("3h"), options.repair_session_timeout); - - if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) - { - assertEquals(new DurationSpec.IntSecondsBound("1h"), options.min_repair_interval); - } - else - { - assertEquals(new DurationSpec.IntSecondsBound("24h"), options.min_repair_interval); - } + assertEquals(new DurationSpec.IntSecondsBound("24h"), options.min_repair_interval); } @Test @@ -466,6 +457,6 @@ public void testGlobalOptions() assertEquals(new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()), config.global_settings.token_range_splitter); assertEquals(new DurationSpec.IntSecondsBound("5m"), config.global_settings.initial_scheduler_delay); assertEquals(new DurationSpec.IntSecondsBound("3h"), config.global_settings.repair_session_timeout); - assertNull(config.global_settings.min_repair_interval); + assertEquals(new DurationSpec.IntSecondsBound("24h"), config.global_settings.min_repair_interval); } } From f876899359540f13d01d67388406eb6ae12db2c2 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sat, 8 Mar 2025 01:42:01 -0600 Subject: [PATCH 173/257] Revert tombstones.adoc no op changes. --- .../pages/managing/operating/compaction/tombstones.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc index 48b1a9b62e75..9e0dcb6f7879 100644 --- a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc +++ b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc @@ -24,6 +24,7 @@ This approach is used instead of removing values because of the distributed natu Once an object is marked as a tombstone, queries will ignore all values that are time-stamped previous to the tombstone insertion. == Zombies + In a multi-node cluster, {cassandra} may store replicas of the same data on two or more nodes. This helps prevent data loss, but it complicates the deletion process. If a node receives a delete command for data it stores locally, the node tombstones the specified object and tries to pass the tombstone to other nodes containing replicas of that object. @@ -49,7 +50,7 @@ But if the node does not recover until after the grace period ends, {cassandra} After the tombstone's grace period ends, {cassandra} deletes the tombstone during compaction. -== Deletion +== Deletion After `gc_grace_seconds` has expired the tombstone may be removed (meaning there will no longer be any object that a certain piece of data was deleted). From 9658c032f1bd6e6d217892e84eab197e7110f789 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sat, 8 Mar 2025 02:16:45 -0600 Subject: [PATCH 174/257] Add documentation for reject_repair_compaction_threshold --- conf/cassandra.yaml | 4 ++++ conf/cassandra_latest.yaml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 57f053c7c07a..27b63abe0df3 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2675,6 +2675,10 @@ storage_compatibility_mode: NONE # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s +# Prevents preparing a repair session or beginning a repair streaming session if pending compactions is over +# the given value. Defaults to disabled. +# reject_repair_compaction_threshold: 1024 + # At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during # incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index e3d0c2e89d23..2b6be317fb3a 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2370,6 +2370,10 @@ default_secondary_index_enabled: true # storage_compatibility_mode: NONE +# Prevents preparing a repair session or beginning a repair streaming session if pending compactions is over +# the given value. Defaults to disabled. +# reject_repair_compaction_threshold: 1024 + # At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during # incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) From 76d6018d36f91be3a8bc37c3d5c4bc9b5cb85fe0 Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:06:33 -0500 Subject: [PATCH 175/257] Update src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java Co-authored-by: Francisco Guerrero --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index ea0973729873..b34f1173c68a 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -147,7 +147,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter {{ put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) .build()); - // Restrict incremental repair to 100GB max bytes per schedule to confine the amount of possible autocompaction. + // Restrict incremental repair to 100GiB max bytes per schedule to confine the amount of possible autocompaction. put(AutoRepairConfig.RepairType.INCREMENTAL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.INCREMENTAL) .withMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound("100GiB")) .build()); From bc0ee37b2cb7f93c89817602b58e35fa955d6cd3 Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:08:44 -0500 Subject: [PATCH 176/257] Update conf/cassandra.yaml Co-authored-by: Francisco Guerrero --- conf/cassandra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 27b63abe0df3..92ccc3d05a2c 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2708,7 +2708,7 @@ storage_compatibility_mode: NONE # # round on all nodes in less than this duration, it will not start a new repair round on a given node until # # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce # # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than -# # gc_grace_seconds to avoid zombies. +# # gc_grace_seconds to avoid potential data resurrection. # min_repair_interval: 24h # token_range_splitter: # # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges From 5799849757941b72777564ad751525bcf68e76b0 Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:09:14 -0500 Subject: [PATCH 177/257] Update conf/cassandra.yaml Co-authored-by: Francisco Guerrero --- conf/cassandra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 92ccc3d05a2c..1cba28c6d68c 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2744,7 +2744,7 @@ storage_compatibility_mode: NONE # # so a more frequent schedule such as 1h is recommended. # # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to # # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental -# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep to unrepaired +# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep the unrepaired # # set small. # min_repair_interval: 24h # token_range_splitter: From 0991fa5499b66492b5382390f4c3ab547e2f9c39 Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:10:12 -0500 Subject: [PATCH 178/257] Update conf/cassandra.yaml Co-authored-by: Francisco Guerrero --- conf/cassandra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 1cba28c6d68c..bc135e894541 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2753,7 +2753,7 @@ storage_compatibility_mode: NONE # # This throttles the amount of incremental repair and anticompaction done per schedule after incremental # # repairs are turned on. # bytes_per_assignment: 50GiB -# # Resricts the maximum number of bytes to cover in an individual schedule to 100GiB. +# # Resricts the maximum number of bytes to cover in an individual schedule to the configured max_bytes_per_schedule value (defaults to 100GiB). # # Consider increasing this if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: From 85f09caae5bd8aff31557b9bf52b410d82cd175d Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:10:24 -0500 Subject: [PATCH 179/257] Update conf/cassandra.yaml Co-authored-by: Francisco Guerrero --- conf/cassandra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index bc135e894541..935876cf90fa 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2754,7 +2754,7 @@ storage_compatibility_mode: NONE # # repairs are turned on. # bytes_per_assignment: 50GiB # # Resricts the maximum number of bytes to cover in an individual schedule to the configured max_bytes_per_schedule value (defaults to 100GiB). -# # Consider increasing this if more data is written than this limit within the min_repair_interval. +# # Consider increasing this value if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: # # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired From ccdc23da8be4061acc5ae77406377421d34a3511 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 12:27:17 -0500 Subject: [PATCH 180/257] Doc and conf fixes, initial IR enablement guidance --- conf/cassandra.yaml | 18 ++--- conf/cassandra_latest.yaml | 21 +++--- .../pages/managing/operating/auto_repair.adoc | 71 ++++++++++++++----- .../pages/managing/operating/repair.adoc | 5 +- 4 files changed, 76 insertions(+), 39 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 935876cf90fa..f8574dad806c 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2726,9 +2726,9 @@ storage_compatibility_mode: NONE # # parameters: # # param_key1: param_value1 # parameters: -# # The target and maximum amount of bytes that should be included in a repair -# # assignment. This scopes the amount of work involved in a repair and includes -# # the data covering the range being repaired. +# # The target and maximum amount of compressed bytes that should be included in a repair assignment. +# # This scopes the amount of work involved in a repair and includes the data covering the range being +# # repaired. # bytes_per_assignment: 50GiB # # The maximum number of bytes to cover in an individual schedule. This serves as # # a mechanism to throttle the work done in each repair cycle. You may reduce this @@ -2742,18 +2742,18 @@ storage_compatibility_mode: NONE # # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair # # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, # # so a more frequent schedule such as 1h is recommended. -# # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to -# # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental -# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep the unrepaired -# # set small. +# # NOTE: Please consult +# # https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html#enabling-ir +# # for guidance on enabling incremental repair on ane exiting cluster. # min_repair_interval: 24h # token_range_splitter: # parameters: -# # Configured to attempt repairing 50GiB of data per repair. +# # Configured to attempt repairing 50GiB of compressed data per repair. # # This throttles the amount of incremental repair and anticompaction done per schedule after incremental # # repairs are turned on. # bytes_per_assignment: 50GiB -# # Resricts the maximum number of bytes to cover in an individual schedule to the configured max_bytes_per_schedule value (defaults to 100GiB). +# # Restricts the maximum number of bytes to cover in an individual schedule to the configured +# # max_bytes_per_schedule value (defaults to 100GiB for incremental). # # Consider increasing this value if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 2b6be317fb3a..5f754a997565 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2403,7 +2403,7 @@ storage_compatibility_mode: NONE # # round on all nodes in less than this duration, it will not start a new repair round on a given node until # # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce # # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than -# # gc_grace_seconds to avoid zombies. +# # gc_grace_seconds to avoid potential data resurrection. # min_repair_interval: 24h # token_range_splitter: # # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges @@ -2421,9 +2421,9 @@ storage_compatibility_mode: NONE # # parameters: # # param_key1: param_value1 # parameters: -# # The target and maximum amount of bytes that should be included in a repair -# # assignment. This scopes the amount of work involved in a repair and includes -# # the data covering the range being repaired. +# # The target and maximum amount of compressed bytes that should be included in a repair assignment. +# # This scopes the amount of work involved in a repair and includes the data covering the range being +# # repaired. # bytes_per_assignment: 50GiB # # The maximum number of bytes to cover in an individual schedule. This serves as # # a mechanism to throttle the work done in each repair cycle. You may reduce this @@ -2438,19 +2438,16 @@ storage_compatibility_mode: NONE # # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair # # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, # # so a more frequent schedule such as 1h is recommended. -# # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to -# # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental -# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep to unrepaired -# # set small. -# min_repair_interval: 24h +# min_repair_interval: 1h # token_range_splitter: # parameters: -# # Configured to attempt repairing 50GiB of data per repair. +# # Configured to attempt repairing 50GiB of compressed data per repair. # # This throttles the amount of incremental repair and anticompaction done per schedule after incremental # # repairs are turned on. # bytes_per_assignment: 50GiB -# # Resricts the maximum number of bytes to cover in an individual schedule to 100GiB. -# # Consider increasing this if more data is written than this limit within the min_repair_interval. +# # Restricts the maximum number of bytes to cover in an individual schedule to the configured +# # max_bytes_per_schedule value (defaults to 100GiB for incremental). +# # Consider increasing this value if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: # # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index db50a8c4524b..e82cb95d1223 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -38,7 +38,7 @@ xref:#fixed-split-token-range-splitter[FixedSplitTokenRangeSplitter]. == Considerations -Before enabling auto repair, please consult the xref:managing/operating/repair.adoc[Repair] guide to establish a base +Before enabling Auto Repair, please consult the xref:managing/operating/repair.adoc[Repair] guide to establish a base understanding of repairs. === Full Repair @@ -59,11 +59,48 @@ be eventually purged. Running incremental repair more frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a shorter `min_repair_interval` such as `1h` is recommended for new clusters. -NOTE: When turning on incremental repair for the first time on an existing cluser with a large amount of data it is -advisable to use a larger interval such as `24h` or longer to reduce the impact of anticompaction caused by incremental -repair. Once the entire token range has been repaired, one may consider reducing this to a shorter interval. It is -strongly recommended to not enable incremental repair for an existing cluster unless full repair has previously -actively run. +==== Enabling Incremental Repair on existing clusters with a large amount of data +[#enabling-ir] +One should be careful when enabling incremental repair on a cluster for the first time. While +xref:#repair-token-range-splitter[RepairTokenRangeSplitter] includes a default configuration to attempt to gracefully +migrate to incremental repair over time, failure to take proper precaution could overwhelm the cluster with +xref:managing/operating/compaction/overview.adoc#types-of-compaction[anticompactions]. + +No matter how one goes about enabling and running incremental repair, it is recommended to run a cycle of full repairs +for the entire cluster as pre-flight step to running incremental repair. This will put the cluster into a more +consistent state which will reduce the amount of streaming between replicas when incremental repair initially runs. + +If you do not have strong data consistency requirements, one may consider using +xref:managing/tools/sstable/sstablerepairedset.adoc[nodetool sstablerepairedset] to mark all SSTables as repaired +before enabling incremental repair scheduling using Auto Repair. This will reduce the burden of initially running +incremental repair because all existing data will be considered as repaired, so subsequent incremental repairs will +only run against new data. + +If you do have strong data consistency requirements, then one must treat all data as initially unrepaired and run +incremental repair against it. Consult +xref:#incremental-repair-defaults[RepairTokenRangeSplitter's Incremental repair defaults]. + +In particular one should be mindful of the xref:managing/operating/compaction/overview.adoc[compaction strategy] +you use for your tables and how it might impact incremental repair before running incremental repair for the first +time: + +- *Large SSTables*: When using xref:managing/operating/compaction/stcs.adoc[SizeTieredCompactionStrategy] or any + compaction strategy which can create large SSTables including many partitions the amount of + xref:managing/operating/compaction/overview.adoc#types-of-compaction[anticompaction] that might be required could be + excessive. Using a small `bytes_per_assignment` might contribute to repeated anticompactions over the same + unrepaired data. +- *Partitions overlapping many SSTables*: If partitions overlap between many SSTables, the amount of SSTables included + in a repair might be large. Therefore it is important to consider that many SSTables may be included in a repair + session and must all be anticompacted. xref:managing/operating/compaction/lcs.adoc[LeveledCompactionStrategy] is less + susceptible to this issue as it prevents overlapping of partitions within levels outside of L0, but if SSTables + start accumulating in L0 between incremental repairs, the cost of anticompaction will increase. + xref:managing/operating/compaction/ucs#sharding[UnifiedCompactionStrategy's sharding] can also be used to avoid + partitions overlapping SSTables. + +The xref:#repair-token-range-splitter[token_range_splitter] configuration for incremental repair includes a default +configuration that attempts to conservatively migrate 100GiB of compressed data every day per node. Depending on +requirements, data set and capability of a cluster's hardware, one may consider tuning these values to be more +aggressive or conservative. === Previewing Repaired Data @@ -80,11 +117,11 @@ When enabled, the `BytesPreviewedDesynchronized` and `TokenRangesPreviewedDesync xref:cassandra:managing/operating/metrics.adoc#table-metrics[table metrics] can be used to detect inconsistencies in the repaired data set. -== Configuring auto repair in cassandra.yaml +== Configuring Auto Repair in cassandra.yaml -Configuration for auto repair is managed in the `cassandra.yaml` file by the `auto_repair` property. +Configuration for Auto Repair is managed in the `cassandra.yaml` file by the `auto_repair` property. -A rich set of configuration exists for configuring auto repair with sensible defaults. However, the expectation +A rich set of configuration exists for configuring Auto Repair with sensible defaults. However, the expectation is that some tuning might be needed particulary when it comes to tuning how often repair should run (`min_repair_interval`) and how repair assignments as created (`token_range_splitter`). @@ -148,9 +185,9 @@ type by using `repair_type_overrides`. such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one round on all nodes in less than this duration, it will not start a new repair round on a given node until this much time has passed since the last repair completed. Consider increasing to a larger value to reduce the impact of repairs, -however *note that one should attempt to run repairs at a smaller interval than gc_grace_seconds to -avoid xref:cassandra:managing/operating/compaction/tombstones.adoc#zombies[zombies]*. -| class_name | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Implementation of +however note that one should attempt to run repairs at a smaller interval than gc_grace_seconds to +avoid xref:cassandra:managing/operating/compaction/tombstones.adoc#zombies[data resurrection]. +| token_range_splitter.class_name | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Implementation of IAutoRepairTokenRangeSplitter to use; responsible for splitting token ranges for repair assignments. Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter,FixedTokenRangeSplitter}. | repair_by_keyspace | true | If true, attempts to group tables in the same keyspace into one repair; otherwise, @@ -183,20 +220,20 @@ in nodetool repair. General advice is to keep this true. `RepairTokenRangeSplitter` is the default implementation of `IAutoRepairTokenRangeSplitter` that attempts to create token range assignments meeting the following goals: -1. *Create smaller, consistent repair times*: Long repairs, such as those lasting 15 hours, can be problematic. If a +- *Create smaller, consistent repair times*: Long repairs, such as those lasting 15 hours, can be problematic. If a node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact of disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration becoming the main bottleneck. -2. *Minimize the impact on hosts*: Repairs should not heavily affect the host systems. For incremental repairs, this +- *Minimize the impact on hosts*: Repairs should not heavily affect the host systems. For incremental repairs, this might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide partitions can lead to issues with disk usage and higher compaction costs. -3. *Reduce overstreaming*: The Merkle tree, which represents data within each partition and range, has a maximum size. +- *Reduce overstreaming*: The Merkle tree, which represents data within each partition and range, has a maximum size. If a repair covers too many partitions, the tree’s leaves represent larger data ranges. Even a small change in a leaf can trigger excessive data streaming, making the process inefficient. -4. *Reduce number of repairs*: If there are many small tables, it's beneficial to batch these tables together under a +- *Reduce number of repairs*: If there are many small tables, it's beneficial to batch these tables together under a single parent repair. This prevents the repair overhead from becoming a bottleneck, especially when dealing with hundreds of tables. Running individual repairs for each table can significantly impact performance and efficiency. @@ -281,7 +318,7 @@ conservative this top level configuration could be increased to a larger value t == Table configuration -If auto repair is enabled in cassandra.yaml, the `auto_repair` property may be optionally configured at the table +If Auto Repair is enabled in cassandra.yaml, the `auto_repair` property may be optionally configured at the table level, e.g.: [source,cql] diff --git a/doc/modules/cassandra/pages/managing/operating/repair.adoc b/doc/modules/cassandra/pages/managing/operating/repair.adoc index 1d0cc977faee..d7eaba171125 100644 --- a/doc/modules/cassandra/pages/managing/operating/repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/repair.adoc @@ -36,7 +36,10 @@ traditionally not been run automatically by Cassandra. In the latest version of Cassandra, a new feature called xref:managing/operating/auto_repair.adoc[auto repair] was introduced to -allow Cassandra to run repairs automatically on a schedule. +allow Cassandra to submit and manage repairs automatically on a schedule. + +The introduction of this feature does not interfere with existing repair +functionality enabled via nodetool. == Submitting Repairs Using Nodetool From 3cd0c85dab2a24b08115b05c9ee220b2dff2351d Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 13:22:35 -0500 Subject: [PATCH 181/257] Update src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java Co-authored-by: Francisco Guerrero --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index b34f1173c68a..ace01ff514e2 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -106,7 +106,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter /** * Maximum number of tables to include in a repair assignment if {@link AutoRepairConfig.Options#repair_by_keyspace} - * is enabled. + * is enabled */ static final String MAX_TABLES_PER_ASSIGNMENT = "max_tables_per_assignment"; From f6ef769c5b7aa9bb18903672b2ea45319104b19b Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 13:22:51 -0500 Subject: [PATCH 182/257] Update src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java Co-authored-by: Francisco Guerrero --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index ace01ff514e2..a743df946e36 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -137,7 +137,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter *
          max_tables_per_assignment: 64
        *
          max_bytes_per_schedule: 1000GiB
        * - * It's expected that these defaults should work well for everything except incremental, where we + * It's expected that these defaults should work well for everything except incremental, where we set * max_bytes_per_schedule to 100GiB. This should strike a good balance between the amount of data that will be * repaired during an initial migration to incremental repair and should move the entire repaired set from * unrepaired to repaired at steady state, assuming not more the 100GiB of data is written to a node per From f6399d3bf87183a87b047389689a468beb1896e6 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Mar 2025 17:25:12 -0500 Subject: [PATCH 183/257] Update num_of_subranges --- .../cassandra/pages/managing/operating/auto_repair.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index e82cb95d1223..c456536d905c 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -297,9 +297,9 @@ The following `parameters` apply for `FixedSplitTokenRangeSplitter` configuratio [cols=",,",options="header",] |=== | Name | Default | Description -| number_of_subranges | 64 | Number of evenly split subranges to create for each node that repair runs for. +| number_of_subranges | 32 | Number of evenly split subranges to create for each node that repair runs for. If vnodes are configured using `num_tokens`, attempts to evenly subdivide subranges by each range. For example, for -`num_tokens: 16` and `number_of_subranges: 64`, 4 (64/16) repair assignments will be created for each token range. At +`num_tokens: 16` and `number_of_subranges: 32`, 2 (32/16) repair assignments will be created for each token range. At least one repair assignment will be created for each token range. |=== From a1fc22858f0ef21042fa77091435b7ed61c3eb51 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Tue, 11 Mar 2025 17:06:07 -0700 Subject: [PATCH 184/257] Move auto-repair retry policy config to repair-type level --- .../repair/autorepair/AutoRepair.java | 8 ++-- .../repair/autorepair/AutoRepairConfig.java | 48 +++++++++---------- .../cassandra/service/AutoRepairService.java | 28 +++++------ .../service/AutoRepairServiceMBean.java | 7 ++- .../org/apache/cassandra/tools/NodeProbe.java | 20 ++++---- .../tools/nodetool/SetAutoRepairConfig.java | 12 ++--- .../AutoRepairParameterizedTest.java | 12 ++--- .../service/AutoRepairServiceBasicTest.java | 8 ++-- .../nodetool/SetAutoRepairConfigTest.java | 24 ++-------- 9 files changed, 74 insertions(+), 93 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 07fb4793c51e..7238d14acdf6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -313,7 +313,7 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim boolean success = false; int retryCount = 0; Future f = null; - while (retryCount <= config.getRepairMaxRetries()) + while (retryCount <= config.getRepairMaxRetries(repairType)) { RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, Lists.newArrayList(curRepairAssignment.getTableNames()), @@ -336,14 +336,14 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim { break; } - else if (retryCount < config.getRepairMaxRetries()) + else if (retryCount < config.getRepairMaxRetries(repairType)) { boolean cancellationStatus = f.cancel(true); logger.warn("Repair failed for range {}-{} for {} tables {} with cancellationStatus: {} retrying after {} seconds...", tokenRange.left, tokenRange.right, keyspaceName, curRepairAssignment.getTableNames(), - cancellationStatus, config.getRepairRetryBackoff().toSeconds()); - sleepFunc.accept(config.getRepairRetryBackoff().toSeconds(), TimeUnit.SECONDS); + cancellationStatus, config.getRepairRetryBackoff(repairType).toSeconds()); + sleepFunc.accept(config.getRepairRetryBackoff(repairType).toSeconds(), TimeUnit.SECONDS); } retryCount++; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index c352c48b1140..a23cd3021232 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -56,10 +56,6 @@ public class AutoRepairConfig implements Serializable // The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata // for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. public volatile DurationSpec.IntSecondsBound history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("2h"); - // Maximum number of retries for a repair session. - public volatile Integer repair_max_retries = 3; - // Backoff time before retrying a repair session. - public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); // Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming // the node by scheduling too many repair tasks in a short period of time. public volatile DurationSpec.LongSecondsBound repair_task_min_duration = new DurationSpec.LongSecondsBound("5s"); @@ -169,26 +165,6 @@ public void setAutoRepairHistoryClearDeleteHostsBufferInterval(String duration) history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound(duration); } - public int getRepairMaxRetries() - { - return repair_max_retries; - } - - public void setRepairMaxRetries(int maxRetries) - { - repair_max_retries = maxRetries; - } - - public DurationSpec.LongSecondsBound getRepairRetryBackoff() - { - return repair_retry_backoff; - } - - public void setRepairRetryBackoff(String interval) - { - repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); - } - public DurationSpec.LongSecondsBound getRepairTaskMinDuration() { return repair_task_min_duration; @@ -360,6 +336,26 @@ public void setRepairSessionTimeout(RepairType repairType, String repairSessionT getOptions(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); } + public int getRepairMaxRetries(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_max_retries); + } + + public void setRepairMaxRetries(RepairType repairType, int maxRetries) + { + getOptions(repairType).repair_max_retries = maxRetries; + } + + public DurationSpec.LongSecondsBound getRepairRetryBackoff(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_retry_backoff); + } + + public void setRepairRetryBackoff(RepairType repairType, String interval) + { + getOptions(repairType).repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); + } + @VisibleForTesting static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(RepairType repairType, ParameterizedClass parameterizedClass) throws ConfigurationException { @@ -505,6 +501,10 @@ protected static Options getDefaultOptions() public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; // Timeout for resuming stuck repair sessions. public volatile DurationSpec.IntSecondsBound repair_session_timeout; + // Maximum number of retries for a repair session. + public volatile Integer repair_max_retries = 3; + // Backoff time before retrying a repair session. + public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); public String toString() { diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 0cf744296e07..9f66a92abc95 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -100,8 +100,6 @@ public String getAutoRepairConfiguration() StringBuilder sb = new StringBuilder(); sb.append("repair scheduler configuration:"); appendConfig(sb, "repair_check_interval", config.getRepairCheckInterval()); - appendConfig(sb, "repair_max_retries", config.getRepairMaxRetries()); - appendConfig(sb, "repair_retry_backoff", config.getRepairRetryBackoff()); appendConfig(sb, "repair_task_min_duration", config.getRepairTaskMinDuration()); appendConfig(sb, "history_clear_delete_hosts_buffer_interval", config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); for (RepairType repairType : RepairType.values()) @@ -162,18 +160,6 @@ public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); } - @Override - public void setAutoRepairMaxRetriesCount(int retries) - { - config.setRepairMaxRetries(retries); - } - - @Override - public void setAutoRepairRetryBackoff(String interval) - { - config.setRepairRetryBackoff(interval); - } - @Override public void setAutoRepairMinRepairTaskDuration(String duration) { @@ -261,6 +247,18 @@ public void setRepairByKeyspace(String repairType, boolean repairByKeyspace) config.setRepairByKeyspace(RepairType.parse(repairType), repairByKeyspace); } + @Override + public void setAutoRepairMaxRetriesCount(String repairType, int retries) + { + config.setRepairMaxRetries(RepairType.parse(repairType), retries); + } + + @Override + public void setAutoRepairRetryBackoff(String repairType, String interval) + { + config.setRepairRetryBackoff(RepairType.parse(repairType), interval); + } + private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig config) { StringBuilder sb = new StringBuilder(); @@ -288,6 +286,8 @@ private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig co appendConfig(sb , "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); appendConfig(sb , "repair_session_timeout", config.getRepairSessionTimeout(repairType)); appendConfig(sb , "force_repair_new_node", config.getForceRepairNewNode(repairType)); + appendConfig(sb , "repair_max_retries", config.getRepairMaxRetries(repairType)); + appendConfig(sb , "repair_retry_backoff", config.getRepairRetryBackoff(repairType)); final ParameterizedClass splitterClass = config.getTokenRangeSplitter(repairType); final String splitterClassName = splitterClass.class_name != null ? splitterClass.class_name : AutoRepairConfig.DEFAULT_SPLITTER.getName(); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index d57329e86795..72d9da7c5a0c 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -39,10 +39,6 @@ public interface AutoRepairServiceMBean public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); - public void setAutoRepairMaxRetriesCount(int retries); - - public void setAutoRepairRetryBackoff(String interval); - public void setAutoRepairMinRepairTaskDuration(String duration); public void setRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold); @@ -71,4 +67,7 @@ public interface AutoRepairServiceMBean public void setRepairByKeyspace(String repairType, boolean repairByKeyspace); + public void setAutoRepairMaxRetriesCount(String repairType, int retries); + + public void setAutoRepairRetryBackoff(String repairType, String interval); } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 723894caa093..bf93bd179d4d 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2610,16 +2610,6 @@ public void startAutoRepairScheduler() autoRepairProxy.startScheduler(); } - public void setAutoRepairMaxRetriesCount(int retries) - { - autoRepairProxy.setAutoRepairMaxRetriesCount(retries); - } - - public void setAutoRepairRetryBackoff(String interval) - { - autoRepairProxy.setAutoRepairRetryBackoff(interval); - } - public void setAutoRepairMinRepairTaskDuration(String duration) { autoRepairProxy.setAutoRepairMinRepairTaskDuration(duration); @@ -2684,6 +2674,16 @@ public void setAutoRepairRepairByKeyspace(String repairType, boolean enabled) { autoRepairProxy.setRepairByKeyspace(repairType, enabled); } + + public void setAutoRepairMaxRetriesCount(String repairType, int retries) + { + autoRepairProxy.setAutoRepairMaxRetriesCount(repairType, retries); + } + + public void setAutoRepairRetryBackoff(String repairType, String interval) + { + autoRepairProxy.setAutoRepairRetryBackoff(repairType, interval); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 37f809943fb6..5a9a1411bb59 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -87,12 +87,6 @@ public void execute(NodeProbe probe) case "history_clear_delete_hosts_buffer_interval": probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); return; - case "repair_max_retries": - probe.setAutoRepairMaxRetriesCount(Integer.parseInt(paramVal)); - return; - case "repair_retry_backoff": - probe.setAutoRepairRetryBackoff(paramVal); - return; case "min_repair_task_duration": probe.setAutoRepairMinRepairTaskDuration(paramVal); return; @@ -163,6 +157,12 @@ public void execute(NodeProbe probe) case "repair_by_keyspace": probe.setAutoRepairRepairByKeyspace(repairTypeStr, Boolean.parseBoolean(paramVal)); break; + case "repair_max_retries": + probe.setAutoRepairMaxRetriesCount(repairTypeStr, Integer.parseInt(paramVal)); + break; + case "repair_retry_backoff": + probe.setAutoRepairRetryBackoff(repairTypeStr, paramVal); + break; default: throw new IllegalArgumentException("Unknown parameter: " + paramType); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index faa70e3d451f..1cc80791b4e6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -489,7 +489,7 @@ public void testMetrics() AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMaterializedViewRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); - config.setRepairRetryBackoff("0s"); + config.setRepairRetryBackoff(repairType, "0s"); config.setAutoRepairTableMaxRepairTime(repairType, "0s"); AutoRepair.timeFunc = () -> { timeFuncCalls++; @@ -653,7 +653,7 @@ public void testRepairMaxRetries() AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.SECONDS, unit); - assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); + assertEquals(config.getRepairRetryBackoff(repairType).toSeconds(), (long) duration); }; config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repairStates.put(repairType, autoRepairState); @@ -661,7 +661,7 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); // Expect configured retries for each keyspace expected to be repaired - assertEquals(config.getRepairMaxRetries()*expectedRepairAssignments, sleepCalls.get()); + assertEquals(config.getRepairMaxRetries(repairType)*expectedRepairAssignments, sleepCalls.get()); verify(autoRepairState, times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, times(1)).setFailedTokenRangesCount(expectedRepairAssignments); @@ -677,7 +677,7 @@ public void testRepairSuccessAfterRetry() AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.SECONDS, unit); - assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); + assertEquals(config.getRepairRetryBackoff(repairType).toSeconds(), (long) duration); }; doAnswer(invocation -> { if (sleepCalls.get() == 0) @@ -692,7 +692,7 @@ public void testRepairSuccessAfterRetry() return null; }).when(repairRunnable).addProgressListener(Mockito.any()); config.setRepairMinInterval(repairType, "0s"); - config.setRepairMaxRetries(1); + config.setRepairMaxRetries(repairType, 1); AutoRepair.instance.repairStates.put(repairType, autoRepairState); AutoRepair.instance.repair(repairType); @@ -866,7 +866,7 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() return runnable; }).when(spyState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); when(spyState.getLastRepairTime()).thenReturn((long) 0); - AutoRepairService.instance.getAutoRepairConfig().setRepairMaxRetries(0); + AutoRepairService.instance.getAutoRepairConfig().setRepairMaxRetries(repairType, 0); AutoRepair.instance.repairStates.put(repairType, spyState); AutoRepair.instance.repair(repairType); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index bd6fcd608ba3..07b8bcc69ec3 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -75,17 +75,17 @@ public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() @Test public void testsetAutoRepairMaxRetriesCount() { - autoRepairService.setAutoRepairMaxRetriesCount(101); + autoRepairService.setAutoRepairMaxRetriesCount(AutoRepairConfig.RepairType.INCREMENTAL.name(), 101); - assertEquals(101, config.getRepairMaxRetries()); + assertEquals(101, config.getRepairMaxRetries(AutoRepairConfig.RepairType.INCREMENTAL)); } @Test public void testsetAutoRepairRetryBackoffInSec() { - autoRepairService.setAutoRepairRetryBackoff("102s"); + autoRepairService.setAutoRepairRetryBackoff(AutoRepairConfig.RepairType.INCREMENTAL.name(), "102s"); - assertEquals(102, config.getRepairRetryBackoff().toSeconds()); + assertEquals(102, config.getRepairRetryBackoff(AutoRepairConfig.RepairType.INCREMENTAL).toSeconds()); } @Test(expected = ConfigurationException.class) diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 5a32c8285d48..4ea9516e8ef8 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -95,26 +95,6 @@ public void testHistoryDeleteHostsClearBufferInSec() verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); } - @Test - public void testRepairMaxRetries() - { - cmd.args = ImmutableList.of("repair_max_retries", "2"); - - cmd.execute(probe); - - verify(probe, times(1)).setAutoRepairMaxRetriesCount(2); - } - - @Test - public void testRetryBackoffInSec() - { - cmd.args = ImmutableList.of("repair_retry_backoff", "3s"); - - cmd.execute(probe); - - verify(probe, times(1)).setAutoRepairRetryBackoff("3s"); - } - @Test public void testStartScheduler() { @@ -286,7 +266,9 @@ public static Collection testCases() forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setAutoRepairParallelRepairPercentage(type.name(), 7)), forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairMaterializedViewRepairEnabled(type.name(), true)), forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type.name(), ImmutableSet.of("dc1", "dc2"))), - forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type.name(), "max_bytes_per_schedule", "500GiB")) + forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type.name(), "max_bytes_per_schedule", "500GiB")), + forEachRepairType("repair_max_retries", "3", (type) -> verify(probe, times(1)).setAutoRepairMaxRetriesCount(type.name(), 3)), + forEachRepairType("repair_retry_backoff", "60s", (type) -> verify(probe, times(1)).setAutoRepairRetryBackoff(type.name(), "60s")) ).flatMap(Function.identity()).collect(Collectors.toList()); } From 703427e5fcb6ffc79e0c9b2df7f9a5af581c57d9 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Wed, 12 Mar 2025 20:37:50 -0700 Subject: [PATCH 185/257] Update docs --- conf/cassandra.yaml | 8 ++++---- conf/cassandra_latest.yaml | 8 ++++---- .../pages/managing/operating/auto_repair.adoc | 12 +++++++----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index f8574dad806c..b40be44daaec 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2768,10 +2768,6 @@ storage_compatibility_mode: NONE # # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule # # repairs. # repair_check_interval: 5m -# # Maximum number of retries for a repair session. -# repair_max_retries: 3 -# # Backoff time before retrying a repair session. -# repair_retry_backoff: 30s # # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming # # the node by scheduling too many repair tasks in a short period of time. # repair_task_min_duration: 5s @@ -2813,6 +2809,10 @@ storage_compatibility_mode: NONE # # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults # # to true. General advice is to keep this true. # repair_primary_token_range_only: true +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time before retrying a repair session. +# repair_retry_backoff: 30s # token_range_splitter: # # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 5f754a997565..c37ec27c75bf 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2461,10 +2461,6 @@ storage_compatibility_mode: NONE # # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule # # repairs. # repair_check_interval: 5m -# # Maximum number of retries for a repair session. -# repair_max_retries: 3 -# # Backoff time before retrying a repair session. -# repair_retry_backoff: 30s # # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming # # the node by scheduling too many repair tasks in a short period of time. # repair_task_min_duration: 5s @@ -2506,6 +2502,10 @@ storage_compatibility_mode: NONE # # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults # # to true. General advice is to keep this true. # repair_primary_token_range_only: true +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time before retrying a repair session. +# repair_retry_backoff: 30s # token_range_splitter: # # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index c456536d905c..a1bf57acf158 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -164,9 +164,6 @@ for each repair type (full, incremental, and preview_repaired), and based on tha | repair_check_interval | 5m | Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule repairs. | repair_max_retries | 3 | Maximum number of retries for a repair session. -| repair_retry_backoff | 30s | Backoff time before retrying a repair session. -| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the -scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. | history_clear_delete_hosts_buffer_interval | 2h | The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. @@ -212,6 +209,9 @@ centers. Useful if you have keyspaces that are not replicated in certain data ce schedule in certain data centers. | repair_primary_token_range_only | true | Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. General advice is to keep this true. +| repair_retry_backoff | 30s | Backoff time before retrying a repair session. +| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the +scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. |=== === `RepairTokenRangeSplitter` configuration @@ -352,8 +352,6 @@ $> nodetool getautorepairconfig repair scheduler configuration: repair_check_interval: 5m repair_max_retries: 3 - repair_retry_backoff: 30s - repair_task_min_duration: 5s history_clear_delete_hosts_buffer_interval: 2h configuration for repair_type: full enabled: true @@ -370,6 +368,8 @@ configuration for repair_type: full initial_scheduler_delay: 5m repair_session_timeout: 3h force_repair_new_node: false + repair_retry_backoff: 30s + repair_task_min_duration: 5s token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter token_range_splitter.bytes_per_assignment: 50GiB token_range_splitter.partitions_per_assignment: 1048576 @@ -390,6 +390,8 @@ configuration for repair_type: incremental initial_scheduler_delay: 5m repair_session_timeout: 3h force_repair_new_node: false + repair_retry_backoff: 30s + repair_task_min_duration: 5s token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter token_range_splitter.bytes_per_assignment: 50GiB token_range_splitter.partitions_per_assignment: 1048576 From 4f0a40c936c5a20d2a4e9cbfd2efe22862fd1813 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 12 Mar 2025 20:34:43 -0500 Subject: [PATCH 186/257] Avoid returning MY_TURN when replicas are actively taking their turn Previously AutoRepairUtils.myTurnToRunRepair would return MY_TURN if there is capacity to select a new node for repair and the current node has the oldest lastRepairFinishTime. This change adjusts this method to make use of a newly added method getMostEligibleHostToRepair which will identify the node with the oldest lastRepairFinishTime of which none of the node's replicas are actively repairing. This should prevent replicas from actively repairing concurrently, which would increase load on replica sets and also create anticompaction conflicts for incremental repair. Behavior is configurable by allow_parallel_replica_repair and allow_parallel_replica_repair_across_schedules configuration. patch by Andy Tolbert, reviewed by Jaydeepkumar Chovatia and Francisco Guerrero for CASSANDRA-20180 --- conf/cassandra.yaml | 9 + conf/cassandra_latest.yaml | 9 + .../pages/managing/operating/auto_repair.adoc | 7 + .../pages/managing/operating/metrics.adoc | 11 +- .../cassandra/metrics/AutoRepairMetrics.java | 39 ++ .../repair/autorepair/AutoRepair.java | 22 +- .../repair/autorepair/AutoRepairConfig.java | 33 ++ .../repair/autorepair/AutoRepairUtils.java | 388 ++++++++++++++++-- .../cassandra/service/AutoRepairService.java | 46 ++- .../service/AutoRepairServiceMBean.java | 4 + .../cassandra/service/StorageService.java | 8 +- .../org/apache/cassandra/tools/NodeProbe.java | 10 + .../tools/nodetool/SetAutoRepairConfig.java | 10 +- ...allelReplicaRepairAcrossSchedulesTest.java | 129 ++++++ .../test/repair/AutoRepairSchedulerTest.java | 113 +++-- .../autorepair/AutoRepairConfigTest.java | 69 +++- .../autorepair/AutoRepairMetricsTest.java | 92 +++++ .../AutoRepairParameterizedTest.java | 2 + .../autorepair/AutoRepairUtilsTest.java | 3 +- 19 files changed, 901 insertions(+), 103 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index b40be44daaec..00b5e6e38ad1 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2787,6 +2787,15 @@ storage_compatibility_mode: NONE # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value # # is used. # parallel_repair_percentage: 3 +# # Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. +# # Defaults to false, as running repairs concurrently on replicas can increase load and also cause anticompaction +# # conflicts while running incremental repair. +# allow_parallel_replica_repair: false +# # An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) +# # are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will +# # prevent starting incremental repairs for this node. Defaults to true and is only evaluated when +# # allow_parallel_replica_repair is false. +# allow_parallel_replica_repair_across_schedules: true # # Repairs materialized views if true. # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index c37ec27c75bf..befaa7ece68e 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2480,6 +2480,15 @@ storage_compatibility_mode: NONE # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value # # is used. # parallel_repair_percentage: 3 +# # Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. +# # Defaults to false, as running repairs concurrently on replicas can increase load and also cause anticompaction +# # conflicts while running incremental repair. +# allow_parallel_replica_repair: false +# # An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) +# # are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will +# # prevent starting incremental repairs for this node. Defaults to true and is only evaluated when +# # allow_parallel_replica_repair is false. +# allow_parallel_replica_repair_across_schedules: true # # Repairs materialized views if true. # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index a1bf57acf158..06c0db456753 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -195,6 +195,13 @@ the -j option in nodetool repair. larger value is used. | parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If `parallel_repair_count is set`, the larger value is used. +| allow_parallel_replica_repair | false | Whether to allow a node to take its turn running repair while one or more of +its replicas are running repair. Defaults to false, as running repairs concurrently on replicas can increase load and +also cause anticompaction conflicts while running incremental repair. +| allow_parallel_replica_repair_across_schedules | true | An addition to allow_parallel_repair that also blocks repairs +when replicas (including this node itself) are repairing in any schedule. +For example, if a replica is executing full repairs, a value of false will prevent starting incremental repairs for this +node. Defaults to true and is only evaluated when allow_parallel_replica_repair is false. | materialized_view_repair_enabled | false | Repairs materialized views if true. | initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index eb9ce8c8d83f..7294a3675e82 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1114,6 +1114,9 @@ the entire Cassandra cluster in seconds |LongestUnrepairedSec |Gauge |Time since the last repair ran on the node in seconds +|RepairStartLagSec|Gauge |If a repair has not run within min_repair_interval, how long past this value since +repairs last completed. Useful for determining if repairs are behind schedule. + |SucceededTokenRangesCount |Gauge |Number of token ranges successfully repaired on the node |FailedTokenRangesCount |Gauge |Number of token ranges failed to repair on the node @@ -1135,9 +1138,13 @@ the automated repair has been disabled on the node |RepairTurnMyTurnDueToPriority |Counter |Represents the node's turn to repair due to priority set in the automated repair -|RepairTurnMyTurnForceRepair |Counter |Represents the node's turn to repair -due to force repair set in the automated repair +|RepairDelayedByReplica |Counter |Represents occurrences of a node's turn being +delayed because a replica was currently taking its turn. Only revelent if +`allow_parallel_replica_repair` is false. +|RepairDelayedBySchedule |Counter |Represents occurrences of a node's turn being +delayed because it was already being repaired in another schedule. Only relevent +if `allow_parallel_replica_repair_across_schedules` is false. |=== diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index ee8a0abfea70..3ef24a9eec1b 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -23,7 +23,9 @@ import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.service.AutoRepairService; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; @@ -37,6 +39,7 @@ public class AutoRepairMetrics public final Gauge nodeRepairTimeInSec; public final Gauge clusterRepairTimeInSec; public final Gauge longestUnrepairedSec; + public final Gauge repairStartLagSec; public final Gauge succeededTokenRangesCount; public final Gauge failedTokenRangesCount; public final Gauge skippedTokenRangesCount; @@ -46,8 +49,16 @@ public class AutoRepairMetrics public Counter repairTurnMyTurn; public Counter repairTurnMyTurnDueToPriority; public Counter repairTurnMyTurnForceRepair; + public Counter repairDelayedByReplica; + public Counter repairDelayedBySchedule; + + private final RepairType repairType; + + private volatile int repairStartLagSecVal; + public AutoRepairMetrics(RepairType repairType) { + this.repairType = repairType; AutoRepairMetricsFactory factory = new AutoRepairMetricsFactory(repairType); repairsInProgress = Metrics.register(factory.createMetricName("RepairsInProgress"), new Gauge() @@ -98,6 +109,14 @@ public Integer getValue() } }); + repairStartLagSec = Metrics.register(factory.createMetricName("RepairStartLagSec"), new Gauge() + { + public Integer getValue() + { + return repairStartLagSecVal; + } + }); + succeededTokenRangesCount = Metrics.register(factory.createMetricName("SucceededTokenRangesCount"), new Gauge() { public Integer getValue() @@ -118,6 +137,9 @@ public Integer getValue() repairTurnMyTurnDueToPriority = Metrics.counter(factory.createMetricName("RepairTurnMyTurnDueToPriority")); repairTurnMyTurnForceRepair = Metrics.counter(factory.createMetricName("RepairTurnMyTurnForceRepair")); + repairDelayedByReplica = Metrics.counter(factory.createMetricName("RepairDelayedByReplica")); + repairDelayedBySchedule = Metrics.counter(factory.createMetricName("RepairDelayedBySchedule")); + totalMVTablesConsideredForRepair = Metrics.register(factory.createMetricName("TotalMVTablesConsideredForRepair"), new Gauge() { public Integer getValue() @@ -151,6 +173,23 @@ public void recordTurn(AutoRepairUtils.RepairTurn turn) default: throw new RuntimeException(String.format("Unrecoginized turn: %s", turn.name())); } + this.repairStartLagSecVal = 0; + } + + /** + * Record perceived lag in scheduling repair. + *

        + * Takes the current time and subtracts it from the given last repair finish time. It then compares the difference + * with the min repair interval for this repair type, and if that value is greater than 0, records it. + */ + public void recordRepairStartLag(long lastFinishTimeInMs) + { + long now = AutoRepair.instance.currentTimeMs(); + long deltaFinish = now - lastFinishTimeInMs; + long deltaMinRepairInterval = deltaFinish - AutoRepairService.instance + .getAutoRepairConfig().getRepairMinInterval(repairType) + .toMilliseconds(); + this.repairStartLagSecVal = deltaMinRepairInterval > 0 ? (int) MILLISECONDS.toSeconds(deltaMinRepairInterval) : 0; } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 7238d14acdf6..51704ce2647b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -154,6 +154,14 @@ public void repairAsync(AutoRepairConfig.RepairType repairType) repairExecutors.get(repairType).submit(() -> repair(repairType)); } + /** + * @return The current observed system time in ms. + */ + public long currentTimeMs() + { + return timeFunc.get(); + } + // repair runs a repair session of the given type synchronously. public void repair(AutoRepairConfig.RepairType repairType) { @@ -179,6 +187,13 @@ public void repair(AutoRepairConfig.RepairType repairType) //consistency level to use for local query UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + + // If it's too soon to run repair, don't bother checking if it's our turn. + if (tooSoonToRunRepair(repairType, repairState, config, myId)) + { + return; + } + RepairTurn turn = AutoRepairUtils.myTurnToRunRepair(repairType, myId); if (turn == MY_TURN || turn == MY_TURN_DUE_TO_PRIORITY || turn == MY_TURN_FORCE_REPAIR) { @@ -189,10 +204,6 @@ public void repair(AutoRepairConfig.RepairType repairType) // When doing force repair, we want to repair without -pr. boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) && turn != MY_TURN_FORCE_REPAIR; - if (tooSoonToRunRepair(repairType, repairState, config, myId)) - { - return; - } long startTime = timeFunc.get(); logger.info("My host id: {}, my turn to run repair...repair primary-ranges only? {}", myId, @@ -387,7 +398,8 @@ private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoR // we should check for the node's repair history in the DB repairState.setLastRepairTime(AutoRepairUtils.getLastRepairTimeForNode(repairType, myId)); } - /** check if it is too soon to run repair. one of the reason we + /* + * check if it is too soon to run repair. one of the reason we * should not run frequent repair is that repair triggers * memtable flush */ diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index a23cd3021232..739a57e9c746 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -275,6 +275,26 @@ public void setParallelRepairCount(RepairType repairType, int count) getOptions(repairType).parallel_repair_count = count; } + public boolean getAllowParallelReplicaRepair(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.allow_parallel_replica_repair); + } + + public void setAllowParallelReplicaRepair(RepairType repairType, boolean enabled) + { + getOptions(repairType).allow_parallel_replica_repair = enabled; + } + + public boolean getAllowParallelReplicaRepairAcrossSchedules(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.allow_parallel_replica_repair_across_schedules); + } + + public void setAllowParallelReplicaRepairAcrossSchedules(RepairType repairType, boolean enabled) + { + getOptions(repairType).allow_parallel_replica_repair_across_schedules = enabled; + } + public boolean getMaterializedViewRepairEnabled(RepairType repairType) { return applyOverrides(repairType, opt -> opt.materialized_view_repair_enabled); @@ -440,6 +460,8 @@ protected static Options getDefaultOptions() opts.number_of_repair_threads = 1; opts.parallel_repair_count = 3; opts.parallel_repair_percentage = 3; + opts.allow_parallel_replica_repair = false; + opts.allow_parallel_replica_repair_across_schedules = true; opts.sstable_upper_threshold = 10000; opts.ignore_dcs = new HashSet<>(); opts.repair_primary_token_range_only = true; @@ -467,6 +489,15 @@ protected static Options getDefaultOptions() // Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value // is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. public volatile Integer parallel_repair_percentage; + // Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. + // Defaults to false, as running repairs concurrently on replicas can increase load and also cause + // anticompaction conflicts while running incremental repair. + public volatile Boolean allow_parallel_replica_repair; + // An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) + // are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will + // prevent starting incremental repairs for this node. Defaults to true and is only evaluated when + // allow_parallel_replica_repair is false. + public volatile Boolean allow_parallel_replica_repair_across_schedules; // Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good // tables. public volatile Integer sstable_upper_threshold; @@ -514,6 +545,8 @@ public String toString() ", number_of_repair_threads=" + number_of_repair_threads + ", parallel_repair_count=" + parallel_repair_count + ", parallel_repair_percentage=" + parallel_repair_percentage + + ", allow_parallel_replica_repair=" + allow_parallel_replica_repair + + ", allow_parallel_replica_repair_across_schedules=" + allow_parallel_replica_repair_across_schedules + ", sstable_upper_threshold=" + sstable_upper_threshold + ", min_repair_interval=" + min_repair_interval + ", ignore_dcs=" + ignore_dcs + diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 19f0558bea64..6da487e5e06b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -21,22 +21,33 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Splitter; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.EndpointsByRange; +import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.locator.LocalStrategy; import org.slf4j.Logger; @@ -57,6 +68,9 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.MetaStrategy; import org.apache.cassandra.locator.NetworkTopologyStrategy; +import org.apache.cassandra.locator.RangesAtEndpoint; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; @@ -69,6 +83,7 @@ import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.transport.Dispatcher; @@ -261,9 +276,10 @@ public static class CurrentRepairStatus public Set hostIdsWithOnGoingRepair; // hosts that is running repair public Set hostIdsWithOnGoingForceRepair; // hosts that is running repair because of force repair Set priority; + public AutoRepairHistory myRepairHistory; List historiesWithoutOnGoingRepair; // hosts that is NOT running repair - public CurrentRepairStatus(List repairHistories, Set priority) + public CurrentRepairStatus(List repairHistories, Set priority, UUID myId) { hostIdsWithOnGoingRepair = new HashSet<>(); hostIdsWithOnGoingForceRepair = new HashSet<>(); @@ -286,10 +302,19 @@ public CurrentRepairStatus(List repairHistories, Set pr { historiesWithoutOnGoingRepair.add(history); } + if (history.hostId.equals(myId)) + { + myRepairHistory = history; + } } this.priority = priority; } + public Set getAllHostsWithOngoingRepair() + { + return Sets.union(hostIdsWithOnGoingRepair, hostIdsWithOnGoingForceRepair); + } + public String toString() { return MoreObjects.toStringHelper(this). @@ -297,6 +322,7 @@ public String toString() add("hostIdsWithOnGoingForceRepair", hostIdsWithOnGoingForceRepair). add("historiesWithoutOnGoingRepair", historiesWithoutOnGoingRepair). add("priority", priority). + add("myRepairHistory", myRepairHistory). toString(); } } @@ -311,7 +337,7 @@ public static List getAutoRepairHistory(RepairType repairType repairHistoryResult = UntypedResultSet.create(repairStatusRows.result); List repairHistories = new ArrayList<>(); - if (repairHistoryResult.size() > 0) + if (!repairHistoryResult.isEmpty()) { for (UntypedResultSet.Row row : repairHistoryResult) { @@ -323,7 +349,7 @@ public static List getAutoRepairHistory(RepairType repairType long lastRepairFinishTime = row.getLong(COL_REPAIR_FINISH_TS, 0); Set deleteHosts = row.getSet(COL_DELETE_HOSTS, UUIDType.instance); long deleteHostsUpdateTime = row.getLong(COL_DELETE_HOSTS_UPDATE_TIME, 0); - Boolean forceRepair = row.has(COL_FORCE_REPAIR) ? row.getBoolean(COL_FORCE_REPAIR) : false; + boolean forceRepair = row.has(COL_FORCE_REPAIR) && row.getBoolean(COL_FORCE_REPAIR); repairHistories.add(new AutoRepairHistory(hostId, repairTurn, lastRepairStartTime, lastRepairFinishTime, deleteHosts, deleteHostsUpdateTime, forceRepair)); } @@ -373,12 +399,6 @@ public static void setForceRepair(RepairType repairType, UUID hostId) logger.info("Set force repair repair type: {}, node: {}", repairType, hostId); } - public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType) - { - List autoRepairHistories = getAutoRepairHistory(repairType); - return getCurrentRepairStatus(repairType, autoRepairHistories); - } - public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) { ResultMessage.Rows rows = selectLastRepairTimeForNode.execute(QueryState.forInternalCalls(), @@ -395,13 +415,12 @@ public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) return repairTime.one().getLong(COL_REPAIR_FINISH_TS); } - public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories) + @VisibleForTesting + public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories, UUID myId) { if (autoRepairHistories != null) { - CurrentRepairStatus status = new CurrentRepairStatus(autoRepairHistories, getPriorityHostIds(repairType)); - - return status; + return new CurrentRepairStatus(autoRepairHistories, getPriorityHostIds(repairType), myId); } return null; } @@ -418,7 +437,8 @@ protected static TreeSet getHostIdsInCurrentRing(RepairType repairType, Co logger.info("Ignore node {} because its datacenter is {}", node, nodeDC); continue; } - /** Check if endpoint state exists in gossip or not. If it + /* + * Check if endpoint state exists in gossip or not. If it * does not then this maybe a ghost node so ignore it */ if (Gossiper.instance.isAlive(node.broadcastAddress)) @@ -447,6 +467,262 @@ public static AutoRepairHistory getHostWithLongestUnrepairTime(RepairType repair return getHostWithLongestUnrepairTime(autoRepairHistories); } + /** + * Convenience method to resolve the broadcast address of a host id from {@link ClusterMetadata} + * @return broadcast address if it exists in CMS, otherwise null. + */ + @Nullable + private static InetAddressAndPort getBroadcastAddress(UUID hostId) + { + Directory directory = ClusterMetadata.current().directory; + + NodeId nodeId = directory.nodeIdFromHostId(hostId); + if (nodeId != null) + { + NodeAddresses nodeAddresses = directory.getNodeAddresses(nodeId); + if (nodeAddresses != null) + { + return nodeAddresses.broadcastAddress; + } + } + return null; + } + + /** + * @return Map of broadcast address to host id, if a broadcast address cannot be found for a host, it is + * not included in the map. + */ + private static Map getBroadcastAddressToHostIdMap(Set hosts) + { + // Get a mapping of endpoint : host id + Map broadcastAddressMap = new HashMap<>(hosts.size()); + for (UUID hostId : hosts) + { + InetAddressAndPort broadcastAddress = getBroadcastAddress(hostId); + if (broadcastAddress == null) + { + logger.warn("Could not resolve broadcast address from host id {} in ClusterMetadata can't accurately " + + "determine if this node is a replica of the local node.", hostId); + } + else + { + broadcastAddressMap.put(broadcastAddress, hostId); + } + } + return broadcastAddressMap; + } + + /** + * @return Mapping of unique replication strategy to keyspaces using that strategy that we care about repairing. + */ + private static Map> getReplicationStrategies() + { + // Collect all unique replication strategies among all keyspaces. + Map> replicationStrategies = new HashMap<>(); + for (Keyspace keyspace : Keyspace.all()) + { + if (AutoRepairUtils.shouldConsiderKeyspace(keyspace)) + { + replicationStrategies.computeIfAbsent(keyspace.getReplicationStrategy(), k -> new ArrayList<>()) + .add(keyspace.getName()); + } + } + return replicationStrategies; + } + + /** + * Collects all hosts being repaired among all active repair schedules and their schedule if + * {@link AutoRepairConfig#getAllowParallelReplicaRepairAcrossSchedules(RepairType)} is true for this repairType. + * Accepts the currently evaluated repairType's schedule as an optimization to avoid grabbing its repair status an + * additional time. + * + * @param myRepairType The repair type schedule being evaluated. + * @param myRepairStatus The repair status for that repair type. + * @return All hosts among active schedules currently being repaired. + */ + private static Map getHostsBeingRepaired(RepairType myRepairType, CurrentRepairStatus myRepairStatus) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + + Map hostsBeingRepaired = myRepairStatus.getAllHostsWithOngoingRepair().stream() + .collect(Collectors.toMap((h) -> h, (v) -> myRepairType)); + + // If we don't allow repairing across schedules, iterate over other enabled schedules and include hosts + // actively being repaired. + if (!config.getAllowParallelReplicaRepairAcrossSchedules(myRepairType)) + { + for (RepairType repairType : RepairType.values()) + { + if (myRepairType == repairType) + continue; + + if (config.isAutoRepairEnabled(repairType)) + { + CurrentRepairStatus repairStatus = getCurrentRepairStatus(repairType, getAutoRepairHistory(repairType), null); + if (repairStatus != null) + { + for (UUID hostId : repairStatus.getAllHostsWithOngoingRepair()) + { + hostsBeingRepaired.putIfAbsent(hostId, repairType); + } + } + } + } + } + return hostsBeingRepaired; + } + + /** + * Identifies the most eligible host to repair for nodes preceding or equal to this nodes' lastRepairFinishTime. + * The criteria for this is to find the node with the oldest last repair finish time of which none of its replicas + * are currently under repair. + * @return The most eligible host to repair or null if no candidates before and including this nodes' current repair status. + */ + @VisibleForTesting + public static AutoRepairHistory getMostEligibleHostToRepair(RepairType repairType, CurrentRepairStatus currentRepairStatus, UUID myId) + { + // 0. If this repairType allows parallel replica repair, short circuit and return the host with the longest unrepair time + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (config.getAllowParallelReplicaRepair(repairType)) + { + return getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); + } + + // 1. Sort repair histories from oldest completed to newest + Stream finishedRepairHistories = currentRepairStatus.historiesWithoutOnGoingRepair + .stream() + .sorted(Comparator.comparingLong(h -> h.lastRepairFinishTime)); + + // 2. Optimization: Truncate repair histories after myId so we don't evaluate anything more recent as if we + // aren't interested in anything that isn't this node. + final AtomicBoolean myHistoryFound = new AtomicBoolean(false); + finishedRepairHistories = finishedRepairHistories.takeWhile((history) -> { + if (myHistoryFound.get()) return false; + + myHistoryFound.set(history.hostId.equals(myId)); + return true; + }); + + // If there are any hosts with ongoing repair, filter the repair histories to not include nodes whose replicas + // are ongoing repair. + Map hostsBeingRepairedToRepairType = getHostsBeingRepaired(repairType, currentRepairStatus); + + // 3. If I am already actively being repaired in another schedule, defer submitting repairs; if already + // repairing for this type, return node so it can take its turn. + RepairType alreadyRepairingType = hostsBeingRepairedToRepairType.get(myId); + if (alreadyRepairingType != null) + { + if (repairType != alreadyRepairingType) + { + logger.info("Deferring repair because I am already actively repairing in schedule {}", hostsBeingRepairedToRepairType.get(myId)); + AutoRepairMetricsManager.getMetrics(repairType).repairDelayedBySchedule.inc(); + return null; + } + else if (currentRepairStatus.myRepairHistory != null) + { + // if the repair type matches this repair, assume the node was restarted while repairing, return node + // so it can take its turn. + logAlreadyMyTurn(); + return currentRepairStatus.myRepairHistory; + } + } + + if (!hostsBeingRepairedToRepairType.isEmpty()) + { + // 4. Extract InetAddresses for each UUID as replicas are identified by their address. + Map hostsBeingRepaired = getBroadcastAddressToHostIdMap(hostsBeingRepairedToRepairType.keySet()); + + // 5. Collect unique replication strategies and group them up with their keyspaces. + Map> replicationStrategies = getReplicationStrategies(); + + // 6. Filter out repair histories who have a replica being repaired, note that this is lazy, given the stream + // is completed using findFirst, it should stop as soon as the matching criteria is met. + finishedRepairHistories = finishedRepairHistories.filter((history) -> !hasReplicaWithOngoingRepair(history, + myId, + repairType, + hostsBeingRepaired, + hostsBeingRepairedToRepairType, + replicationStrategies)); + } + + // 7. Select the first (oldest lastRepairFinishTime) repair history without replicas being repaired + return finishedRepairHistories.findFirst().orElse(null); + } + + + /** + * @return Whether the host for the given eligibleRepairHistory has any replicas in hostsBeingRepaired. + * @param eligibleHistory History of node to check + * @param myId Host id of this node, if the repair history is for this node, additional logging will take place. + * @param myRepairType repair type being evaluated + * @param hostsBeingRepaired Hosts being repaired. + * @param hostIdToRepairType mapping of hosts being repaired to the repair type its being repaired for. + * @param replicationStrategies Mapping of unique replication strategies to keyspaces having that strategy. + */ + private static boolean hasReplicaWithOngoingRepair(AutoRepairHistory eligibleHistory, + UUID myId, + RepairType myRepairType, + Map hostsBeingRepaired, + Map hostIdToRepairType, + Map> replicationStrategies) + { + // If no broadcast address found for this host id in cluster metadata, just skip it, a node should always + // see itself in cluster metadata. + InetAddressAndPort eligibleBroadcastAddress = getBroadcastAddress(eligibleHistory.hostId); + if (eligibleBroadcastAddress == null) + { + return true; + } + + // For each replication strategy, determine if host being repaired is a replica of the local node. + for (Map.Entry> entry : replicationStrategies.entrySet()) + { + AbstractReplicationStrategy replicationStrategy = entry.getKey(); + EndpointsByRange endpointsByRange = replicationStrategy.getRangeAddresses(ClusterMetadata.current()); + + // get ranges of the eligible address for the given replication strategy. + RangesAtEndpoint rangesAtEndpoint = StorageService.instance.getReplicas(replicationStrategy, eligibleBroadcastAddress); + for (Replica replica : rangesAtEndpoint) + { + // get the endpoints involved in this range. + EndpointsForRange endpointsForRange = endpointsByRange.get(replica.range()); + // For each host in this range... + for (InetAddressAndPort inetAddressAndPort : endpointsForRange.endpoints()) + { + // If the address of the node in the range belongs to a host being repaired, return true. + UUID hostId = hostsBeingRepaired.get(inetAddressAndPort); + if (hostId != null) + { + // log if the repair history matches the current running node. + InetAddressAndPort myBroadcastAddress = getBroadcastAddress(myId); + if (myBroadcastAddress != null && myBroadcastAddress.equals(eligibleBroadcastAddress)) + { + logger.info("Deferring repair because replica {} ({}) with shared ranges for " + + "{} keyspace(s) (e.g. {}) is currently taking its turn for schedule {}", + hostId, inetAddressAndPort, entry.getValue().size(), entry.getValue().get(0), + hostIdToRepairType.get(hostId)); + AutoRepairMetricsManager.getMetrics(myRepairType).repairDelayedByReplica.inc(); + } + else if (logger.isDebugEnabled()) + { + logger.debug("Not considering node {} ({}) for repair as it has replica {} ({}) with " + + "shared ranges for {} keyspace(s) (e.g. {}) which is currently taking its " + + "turn for schedule {}", + eligibleHistory.hostId, eligibleBroadcastAddress, + hostId, inetAddressAndPort, entry.getValue().size(), entry.getValue().get(0), + hostIdToRepairType.get(hostId)); + + } + return true; + } + } + } + } + + // No replicas found of eligible host. + return false; + } + private static AutoRepairHistory getHostWithLongestUnrepairTime(List autoRepairHistories) { if (autoRepairHistories == null) @@ -480,6 +756,12 @@ public static int getMaxNumberOfNodeRunAutoRepair(RepairType repairType, int gro return Math.max(1, value); } + private static void logAlreadyMyTurn() + { + logger.warn("This node already was considered to having an ongoing repair for this repair type, must have " + + "been restarted, taking my turn back"); + } + @VisibleForTesting public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) { @@ -493,7 +775,7 @@ public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) List autoRepairHistories = getAutoRepairHistory(repairType); Set autoRepairHistoryIds = new HashSet<>(); - // 1. Remove any node that is not part of group based on goissip info + // 1. Remove any node that is not part of group based on gossip info if (autoRepairHistories != null) { for (AutoRepairHistory nodeHistory : autoRepairHistories) @@ -501,7 +783,7 @@ public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) autoRepairHistoryIds.add(nodeHistory.hostId); // clear delete_hosts if the node's delete hosts is not growing for more than two hours AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - if (nodeHistory.deleteHosts.size() > 0 + if (!nodeHistory.deleteHosts.isEmpty() && config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( currentTimeMillis() - nodeHistory.deleteHostsUpdateTime )) @@ -538,11 +820,14 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } - //get current repair status - CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); + // get updated current repair status + CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, getAutoRepairHistory(repairType), myId); if (currentRepairStatus != null) { - logger.info("Latest repair status {}", currentRepairStatus); + if (logger.isDebugEnabled()) + { + logger.debug("Latest repair status {}", currentRepairStatus); + } //check if I am forced to run repair for (AutoRepairHistory history : currentRepairStatus.historiesWithoutOnGoingRepair) { @@ -553,6 +838,23 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } + // check if node was already indicated as having an ongoing repair, this may happen when a node restarts + // before finishing repairing. + if (currentRepairStatus != null && currentRepairStatus.getAllHostsWithOngoingRepair().contains(myId)) + { + logAlreadyMyTurn(); + + // use the previously chosen turn. + if (currentRepairStatus.myRepairHistory != null && currentRepairStatus.myRepairHistory.repairTurn != null) + { + return RepairTurn.valueOf(currentRepairStatus.myRepairHistory.repairTurn); + } + else + { + return MY_TURN; + } + } + int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepair(repairType, autoRepairHistories == null ? 0 : autoRepairHistories.size()); logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); @@ -567,16 +869,17 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) { // try to fetch again autoRepairHistories = getAutoRepairHistory(repairType); - currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); - if (autoRepairHistories == null || currentRepairStatus == null) + if (autoRepairHistories == null) { logger.error("No record found"); return NOT_MY_TURN; } + + currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories, myId); } UUID priorityHostId = null; - if (currentRepairStatus != null && currentRepairStatus.priority != null) + if (currentRepairStatus.priority != null) { for (UUID priorityID : currentRepairStatus.priority) { @@ -597,21 +900,37 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) if (priorityHostId != null && !myId.equals(priorityHostId)) { logger.info("Priority list is not empty and I'm not the first node in the list, not my turn." + - "First node in priority list is {}", ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(priorityHostId))); + "First node in priority list is {}", getBroadcastAddress(priorityHostId)); return NOT_MY_TURN; } + if (myId.equals(priorityHostId)) { //I have a priority for repair hence its my turn now return MY_TURN_DUE_TO_PRIORITY; } - // get the longest unrepaired node from the nodes which are not running repair - AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); - //check who is next, which is helpful for debugging - logger.info("Next node to be repaired for repair type {} by default: {}", repairType, defaultNodeToBeRepaired); - if (defaultNodeToBeRepaired != null && defaultNodeToBeRepaired.hostId.equals(myId)) + + // Determine if this node is the most eligible host to repair. + AutoRepairHistory nodeToBeRepaired = getMostEligibleHostToRepair(repairType, currentRepairStatus, myId); + if (nodeToBeRepaired != null) { - return MY_TURN; + if (nodeToBeRepaired.hostId.equals(myId)) + { + logger.info("This node is selected to be repaired for repair type {}", repairType); + return MY_TURN; + } + + // log which node is next, which is helpful for debugging + logger.info("Next node to be repaired for repair type {}: {} ({})", repairType, + getBroadcastAddress(nodeToBeRepaired.hostId), + nodeToBeRepaired); + } + + // If this node is not identified as most eligible, set the repair lag time. + if (currentRepairStatus.myRepairHistory != null) + { + AutoRepairMetricsManager.getMetrics(repairType) + .recordRepairStartLag(currentRepairStatus.myRepairHistory.lastRepairFinishTime); } } else if (currentRepairStatus.hostIdsWithOnGoingForceRepair.contains(myId)) @@ -656,7 +975,6 @@ static void updateFinishAutoRepairHistory(RepairType repairType, UUID myId, long ByteBufferUtil.bytes(repairType.toString()), ByteBufferUtil.bytes(myId) )), Dispatcher.RequestTime.forImmediateExecution()); - // Do not remove beblow log, the log is used by dtest logger.info("Auto repair finished for {}", myId); } @@ -756,7 +1074,7 @@ public static Set getPriorityHostIds(RepairType repairType) repairPriorityResult = UntypedResultSet.create(repairPriorityRows.result); Set priorities = null; - if (repairPriorityResult.size() > 0) + if (!repairPriorityResult.isEmpty()) { // there should be only one row UntypedResultSet.Row row = repairPriorityResult.one(); @@ -774,7 +1092,13 @@ public static Set getPriorityHosts(RepairType repairType) Set hosts = new HashSet<>(); for (UUID hostId : getPriorityHostIds(repairType)) { - hosts.add(ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(hostId)).broadcastAddress); + InetAddressAndPort broadcastAddress = getBroadcastAddress(hostId); + if (broadcastAddress == null) + { + logger.warn("Could not resolve broadcastAddress for {}, skipping considering it as a priority host", hostId); + continue; + } + hosts.add(broadcastAddress); } return hosts; } diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 9f66a92abc95..4d34a53268f1 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -202,6 +202,18 @@ public void setParallelRepairCount(String repairType, int count) config.setParallelRepairCount(RepairType.parse(repairType), count); } + @Override + public void setAllowParallelReplicaRepair(String repairType, boolean enabled) + { + config.setAllowParallelReplicaRepair(RepairType.parse(repairType), enabled); + } + + @Override + public void setAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled) + { + config.setAllowParallelReplicaRepairAcrossSchedules(RepairType.parse(repairType), enabled); + } + @Override public void setMVRepairEnabled(String repairType, boolean enabled) { @@ -223,7 +235,7 @@ public Set getOnGoingRepairHostIds(String repairType) return Collections.emptySet(); } Set hostIds = new HashSet<>(); - AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(RepairType.parse(repairType))); + AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(RepairType.parse(repairType)), null); for (UUID id : currentRepairStatus.hostIdsWithOnGoingRepair) { hostIds.add(id.toString()); @@ -273,21 +285,23 @@ private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig co appendConfig(sb, "priority_hosts", Joiner.on(',').skipNulls().join(priorityHosts)); } - appendConfig(sb , "min_repair_interval", config.getRepairMinInterval(repairType)); - appendConfig(sb , "repair_by_keyspace", config.getRepairByKeyspace(repairType)); - appendConfig(sb , "number_of_repair_threads", config.getRepairThreads(repairType)); - appendConfig(sb , "sstable_upper_threshold", config.getRepairSSTableCountHigherThreshold(repairType)); - appendConfig(sb , "table_max_repair_time", config.getAutoRepairTableMaxRepairTime(repairType)); - appendConfig(sb , "ignore_dcs", config.getIgnoreDCs(repairType)); - appendConfig(sb , "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); - appendConfig(sb , "parallel_repair_count", config.getParallelRepairCount(repairType)); - appendConfig(sb , "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); - appendConfig(sb , "materialized_view_repair_enabled", config.getMaterializedViewRepairEnabled(repairType)); - appendConfig(sb , "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); - appendConfig(sb , "repair_session_timeout", config.getRepairSessionTimeout(repairType)); - appendConfig(sb , "force_repair_new_node", config.getForceRepairNewNode(repairType)); - appendConfig(sb , "repair_max_retries", config.getRepairMaxRetries(repairType)); - appendConfig(sb , "repair_retry_backoff", config.getRepairRetryBackoff(repairType)); + appendConfig(sb, "min_repair_interval", config.getRepairMinInterval(repairType)); + appendConfig(sb, "repair_by_keyspace", config.getRepairByKeyspace(repairType)); + appendConfig(sb, "number_of_repair_threads", config.getRepairThreads(repairType)); + appendConfig(sb, "sstable_upper_threshold", config.getRepairSSTableCountHigherThreshold(repairType)); + appendConfig(sb, "table_max_repair_time", config.getAutoRepairTableMaxRepairTime(repairType)); + appendConfig(sb, "ignore_dcs", config.getIgnoreDCs(repairType)); + appendConfig(sb, "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); + appendConfig(sb, "parallel_repair_count", config.getParallelRepairCount(repairType)); + appendConfig(sb, "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); + appendConfig(sb, "allow_parallel_replica_repair", config.getAllowParallelReplicaRepair(repairType)); + appendConfig(sb, "allow_parallel_replica_repair_across_schedules", config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + appendConfig(sb, "materialized_view_repair_enabled", config.getMaterializedViewRepairEnabled(repairType)); + appendConfig(sb, "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); + appendConfig(sb, "repair_session_timeout", config.getRepairSessionTimeout(repairType)); + appendConfig(sb, "force_repair_new_node", config.getForceRepairNewNode(repairType)); + appendConfig(sb, "repair_max_retries", config.getRepairMaxRetries(repairType)); + appendConfig(sb, "repair_retry_backoff", config.getRepairRetryBackoff(repairType)); final ParameterizedClass splitterClass = config.getTokenRangeSplitter(repairType); final String splitterClassName = splitterClass.class_name != null ? splitterClass.class_name : AutoRepairConfig.DEFAULT_SPLITTER.getName(); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 72d9da7c5a0c..181c6008f533 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -53,6 +53,10 @@ public interface AutoRepairServiceMBean public void setParallelRepairCount(String repairType, int count); + public void setAllowParallelReplicaRepair(String repairType, boolean enabled); + + public void setAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled); + public void setMVRepairEnabled(String repairType, boolean enabled); public boolean isAutoRepairDisabled(); diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index bd9b67bc37b0..8ce1dd94bd4b 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -388,8 +388,12 @@ public RangesAtEndpoint getLocalReplicas(String keyspaceName) public RangesAtEndpoint getReplicas(String keyspaceName, InetAddressAndPort endpoint) { - return Keyspace.open(keyspaceName).getReplicationStrategy() - .getAddressReplicas(ClusterMetadata.current(), endpoint); + return getReplicas(Keyspace.open(keyspaceName).getReplicationStrategy(), endpoint); + } + + public RangesAtEndpoint getReplicas(AbstractReplicationStrategy replicationStrategy, InetAddressAndPort endpoint) + { + return replicationStrategy.getAddressReplicas(ClusterMetadata.current(), endpoint); } public List> getLocalRanges(String ks) diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index bf93bd179d4d..478fc0e55a00 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2640,6 +2640,16 @@ public void setAutoRepairParallelRepairCount(String repairType, int count) autoRepairProxy.setParallelRepairCount(repairType, count); } + public void setAutoRepairAllowParallelReplicaRepair(String repairType, boolean enabled) + { + autoRepairProxy.setAllowParallelReplicaRepair(repairType, enabled); + } + + public void setAutoRepairAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled) + { + autoRepairProxy.setAllowParallelReplicaRepairAcrossSchedules(repairType, enabled); + } + public void setAutoRepairPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) { autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 5a9a1411bb59..cc00cabd0633 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -47,7 +47,9 @@ public class SetAutoRepairConfig extends NodeToolCmd "[start_scheduler|number_of_repair_threads|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + - "|parallel_repair_count|parallel_repair_percentage|materialized_view_repair_enabled|repair_max_retries" + + "|parallel_repair_count|parallel_repair_percentage" + + "|allow_parallel_replica_repair|allow_parallel_repair_across_schedules" + + "|materialized_view_repair_enabled|repair_max_retries" + "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration" + "|repair_by_keyspace|token_range_splitter.]", required = true) @@ -148,6 +150,12 @@ public void execute(NodeProbe probe) case "parallel_repair_percentage": probe.setAutoRepairParallelRepairPercentage(repairTypeStr, Integer.parseInt(paramVal)); break; + case "allow_parallel_replica_repair": + probe.setAutoRepairAllowParallelReplicaRepair(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; + case "allow_parallel_replica_repair_across_schedules": + probe.setAutoRepairAllowParallelReplicaRepairAcrossSchedules(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; case "materialized_view_repair_enabled": probe.setAutoRepairMaterializedViewRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); break; diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java new file mode 100644 index 000000000000..a00e713cf298 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.repair; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableMap; + +import org.apache.cassandra.Util; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.service.AutoRepairService; + +import static org.hamcrest.Matchers.greaterThan; +import static org.junit.Assert.assertEquals; + +/** + * Distributed tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} scheduler's + * allow_parallel_replica_repair_across_schedules feature. + */ +public class AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest extends TestBaseImpl +{ + private static Cluster cluster; + + @BeforeClass + public static void init() throws IOException + { + // Configure a cluster with preview and incremental repair enabled in a way that preview repair can be + // run on all three nodes concurrently, but incremental repair can only be run when there are no parallel + // repairs. We should detect contention in the incremental repair scheduler but not preview repaired + // scheduler as a result. + cluster = Cluster.build(3) + .withConfig(config -> config + .set("auto_repair", + ImmutableMap.of( + "repair_type_overrides", + ImmutableMap.of(AutoRepairConfig.RepairType.PREVIEW_REPAIRED.getConfigName(), + ImmutableMap.of( + // Configure preview repair to run frequently to + // provoke contention with incremental scheduler. + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "3", + "allow_parallel_replica_repair", "true", + "min_repair_interval", "5s"), + AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "3", + // Don't allow parallel replica repair across + // schedules + "allow_parallel_replica_repair", "false", + "allow_parallel_replica_repair_across_schedules", "false", + "min_repair_interval", "5s")))) + .set("auto_repair.enabled", "true") + .set("auto_repair.global_settings.repair_retry_backoff", "5s") + .set("auto_repair.repair_task_min_duration", "0s") + .set("auto_repair.repair_check_interval", "5s")) + .start(); + + cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); + } + + @AfterClass + public static void tearDown() + { + cluster.close(); + } + + @Test + public void testScheduler() + { + cluster.forEach(i -> i.runOnInstance(() -> { + try + { + AutoRepairService.setup(); + AutoRepair.instance.setup(); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + })); + + // validate that the repair ran on all nodes + cluster.forEach(i -> i.runOnInstance(() -> { + // Expect contention on incremental repair across schedules + AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); + Util.spinAssert("AutoRepair has not observed any replica contention in INCREMENTAL repair", + greaterThan(0L), + incrementalMetrics.repairDelayedBySchedule::getCount, + 5, + TimeUnit.MINUTES); + + // No repair contention should be observed for preview repaired since allow_parallel_replica_repair was true + AutoRepairMetrics previewMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.PREVIEW_REPAIRED); + assertEquals(0L, previewMetrics.repairDelayedByReplica.getCount()); + assertEquals(0L, previewMetrics.repairDelayedBySchedule.getCount()); + })); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index c7b7448d1a24..e726dc1a17f7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -27,9 +27,13 @@ import com.google.common.collect.ImmutableMap; import org.apache.cassandra.Util; -import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -46,7 +50,7 @@ import static org.junit.Assert.assertEquals; /** - * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + * Distributed tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} scheduler */ public class AutoRepairSchedulerTest extends TestBaseImpl { @@ -61,33 +65,57 @@ public static void init() throws IOException // Create SimpleDateFormat object with the given pattern sdf = new SimpleDateFormat(pattern); sdf.setLenient(false); - cluster = Cluster.build(3).withConfig(config -> config - .set("auto_repair", - ImmutableMap.of( - "repair_type_overrides", - ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), - ImmutableMap.of( - "initial_scheduler_delay", "5s", - "enabled", "true", - "parallel_repair_count", "1", - "parallel_repair_percentage", "0", - "min_repair_interval", "1s"), - AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), - ImmutableMap.of( - "initial_scheduler_delay", "5s", - "enabled", "true", - "parallel_repair_count", "1", - "parallel_repair_percentage", "0", - "min_repair_interval", "1s")))) - .set("auto_repair.enabled", "true") - .set("auto_repair.global_settings.repair_by_keyspace", "true") - .set("auto_repair.repair_task_min_duration", "0s") - .set("auto_repair.repair_check_interval", "10s")).start(); + // Configure a 3-node cluster with num_tokens: 4 and auto_repair enabled + cluster = Cluster.build(3) + .withTokenCount(4) + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(3, 4)) + .withConfig(config -> config + .set("num_tokens", 4) + .set("auto_repair", + ImmutableMap.of( + "repair_type_overrides", + ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "2", + // Allow parallel replica repair to allow replicas + // to execute full repair at same time. + "allow_parallel_replica_repair", "true", + "min_repair_interval", "15s"), + AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + // Set parallel repair count to 3 to provoke + // contention between replicas when scheduling. + "parallel_repair_count", "3", + // Disallow parallel replica repair to prevent + // replicas from issuing incremental repair at + // same time. + "allow_parallel_replica_repair", "false", + // Run more aggressively since full repair is + // less restrictive about when it can run repair, + // so need to check more frequently to allow + // incremental to get an attempt in. + "min_repair_interval", "5s")))) + .set("auto_repair.enabled", "true") + .set("auto_repair.global_settings.repair_by_keyspace", "true") + .set("auto_repair.global_settings.repair_retry_backoff", "5s") + .set("auto_repair.repair_task_min_duration", "0s") + .set("auto_repair.repair_check_interval", "5s")) + .start(); cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); } + @AfterClass + public static void tearDown() + { + cluster.close(); + } + @Test public void testScheduler() throws ParseException { @@ -98,10 +126,7 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { - DatabaseDescriptor.setCDCOnRepairEnabled(false); - DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - AutoRepairService.instance.setup(); - DatabaseDescriptor.setCDCOnRepairEnabled(false); + AutoRepairService.setup(); AutoRepair.instance.setup(); } catch (Exception e) @@ -112,17 +137,39 @@ public void testScheduler() throws ParseException // validate that the repair ran on all nodes cluster.forEach(i -> i.runOnInstance(() -> { - Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", + // Reduce sleeping if repair finishes quickly to speed up test but make it non-zero to provoke some + // contention. + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("1s"); + + AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); + Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", greaterThan(0L), - AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, + () -> incrementalMetrics.nodeRepairTimeInSec.getValue().longValue(), 5, TimeUnit.MINUTES); - Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", + + // Expect some contention on incremental repair. + Util.spinAssert("AutoRepair has not observed any replica contention in INCREMENTAL repair", greaterThan(0L), - AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL)::getLastRepairTime, + incrementalMetrics.repairDelayedByReplica::getCount, 5, TimeUnit.MINUTES); + // Do not expect any contention across schedules since allow_parallel_replica_repairs across schedules + // was not configured. + assertEquals(0L, incrementalMetrics.repairDelayedBySchedule.getCount()); + + AutoRepairMetrics fullMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.FULL); + Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", + greaterThan(0L), + () -> fullMetrics.nodeRepairTimeInSec.getValue().longValue(), + 5, + TimeUnit.MINUTES); + + // No repair contention should be observed for full repair since allow_parallel_replica_repair was true + assertEquals(0L, fullMetrics.repairDelayedByReplica.getCount()); + assertEquals(0L, fullMetrics.repairDelayedBySchedule.getCount()); })); + validate(AutoRepairConfig.RepairType.FULL.toString()); validate(AutoRepairConfig.RepairType.INCREMENTAL.toString()); } @@ -137,7 +184,7 @@ private void validate(String repairType) throws ParseException // repair_type Assert.assertEquals(repairType, row[0].toString()); // host_id - UUID.fromString(row[1].toString()); + Assert.assertNotNull(UUID.fromString(row[1].toString())); // ensure there is a legit repair_start_ts and repair_finish_ts sdf.parse(row[2].toString()); sdf.parse(row[3].toString()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 2c3e4401ed9f..0cf51ca40884 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -20,7 +20,6 @@ import java.util.HashSet; import java.util.Map; -import java.util.Objects; import java.util.Collections; import java.util.Set; @@ -155,7 +154,7 @@ public void testSetRepairThreads() { config.setRepairThreads(repairType, 5); - assert config.getOptions(repairType).number_of_repair_threads == 5; + assertEquals(5, config.getOptions(repairType).number_of_repair_threads.intValue()); } @Test @@ -183,7 +182,7 @@ public void testSetRepairMinFrequencyInHours() { config.setRepairMinInterval(repairType, "5s"); - assert config.getOptions(repairType).min_repair_interval.toSeconds() == 5; + assertEquals(5, config.getOptions(repairType).min_repair_interval.toSeconds()); } @Test @@ -201,7 +200,7 @@ public void testSetAutoRepairHistoryClearDeleteHostsBufferInSec() { config.setAutoRepairHistoryClearDeleteHostsBufferInterval("5s"); - assert Objects.equals(config.history_clear_delete_hosts_buffer_interval, new DurationSpec.IntSecondsBound("5s")); + assertEquals(new DurationSpec.IntSecondsBound("5s"), config.history_clear_delete_hosts_buffer_interval); } @Test @@ -219,7 +218,7 @@ public void testSetRepairSSTableCountHigherThreshold() { config.setRepairSSTableCountHigherThreshold(repairType, 5); - assert config.getOptions(repairType).sstable_upper_threshold == 5; + assertEquals(5, config.getOptions(repairType).sstable_upper_threshold.intValue()); } @Test @@ -237,8 +236,8 @@ public void testSetAutoRepairTableMaxRepairTimeInSec() { config.setAutoRepairTableMaxRepairTime(repairType, "5s"); - assert config.getOptions(repairType).table_max_repair_time.toSeconds() == 5; - } + assertEquals(5, config.getOptions(repairType).table_max_repair_time.toSeconds()); +} @Test public void testGetIgnoreDCs() @@ -291,7 +290,7 @@ public void testSetParallelRepairPercentageInGroup() { config.setParallelRepairPercentage(repairType, 5); - assert config.getOptions(repairType).parallel_repair_percentage == 5; + assertEquals(5, config.getOptions(repairType).parallel_repair_percentage.intValue()); } @Test @@ -309,7 +308,55 @@ public void testSetParallelRepairCountInGroup() { config.setParallelRepairCount(repairType, 5); - assert config.getOptions(repairType).parallel_repair_count == 5; + assertEquals(5, config.getOptions(repairType).parallel_repair_count.intValue()); + } + + @Test + public void testGetAllowParallelReplicaRepair() + { + // should default to false + assertFalse(config.global_settings.allow_parallel_replica_repair); + assertFalse(config.getAllowParallelReplicaRepair(repairType)); + + // setting global to true should also cause repair type config to inherit. + config.global_settings.allow_parallel_replica_repair = true; + assertTrue(config.getAllowParallelReplicaRepair(repairType)); + + } + + @Test + public void testSetAllowParallelReplicaRepair() + { + // should default to false + assertFalse(config.getAllowParallelReplicaRepair(repairType)); + + // setting explicitly for repair type should update it + config.setAllowParallelReplicaRepair(repairType, true); + assertTrue(config.getAllowParallelReplicaRepair(repairType)); + } + + @Test + public void testGetAllowParallelReplicaRepairAcrossSchedules() + { + // should default to true + assertTrue(config.global_settings.allow_parallel_replica_repair_across_schedules); + assertTrue(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + // setting global to true should also cause repair type config to inherit. + config.global_settings.allow_parallel_replica_repair_across_schedules = false; + assertFalse(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + } + + @Test + public void testSetAllowParallelReplicaRepairAcrossSchedules() + { + // should default to true + assertTrue(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + // setting explicitly for repair type should update it + config.setAllowParallelReplicaRepairAcrossSchedules(repairType, false); + assertFalse(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); } @Test @@ -396,7 +443,7 @@ public void testSetInitialSchedulerDelay() { config.setInitialSchedulerDelay(repairType, "5s"); - assert config.getOptions(repairType).initial_scheduler_delay.toSeconds() == 5; + assertEquals(5, config.getOptions(repairType).initial_scheduler_delay.toSeconds()); } @Test @@ -414,7 +461,7 @@ public void testSetRepairSessionTimeout() { config.setRepairSessionTimeout(repairType, "1h"); - assert config.getOptions(repairType).repair_session_timeout.toSeconds() == 3600; + assertEquals(3600, config.getOptions(repairType).repair_session_timeout.toSeconds()); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java new file mode 100644 index 000000000000..d0b053a58778 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.StorageService; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class AutoRepairMetricsTest extends CQLTester +{ + + private AutoRepairMetrics metrics; + + @BeforeClass + public static void setupClass() throws Exception + { + setAutoRepairEnabled(true); + requireNetwork(); + AutoRepairUtils.setup(); + StorageService.instance.doAutoRepairSetup(); + + // Set min repair interval to an hour. + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairMinInterval(RepairType.FULL, "1h"); + } + + @Before + public void setup() + { + metrics = AutoRepairMetricsManager.getMetrics(RepairType.FULL); + } + + @Test + public void testShouldRecordRepairStartLagAndResetOnMyTurn() + { + // record a last finish repair time of one day. + long oneDayAgo = AutoRepair.instance.currentTimeMs() - 86_400_000; + metrics.recordRepairStartLag(oneDayAgo); + + // expect a recorded lag time of approximately 1 day (last repair finish time) - 1 hour (min repair interval) + long expectedLag = 86400 - 3600; + long recordedLag = metrics.repairStartLagSec.getValue(); + assertTrue(String.format("Expected at last 23h of lag (%d) but got (%d)", expectedLag, recordedLag), + recordedLag >= expectedLag); + // Given timing, allow at most 5 seconds of skew. + assertTrue(String.format("Expected 23h of lag (%d) but got a larger value (%d)", expectedLag, recordedLag), + recordedLag <= expectedLag + 5); + + // expect lag time to be restarted when recording a turn. + metrics.recordTurn(RepairTurn.MY_TURN); + assertEquals(0, metrics.repairStartLagSec.getValue().intValue()); + } + + @Test + public void testShouldRecordRepairStartLagOfZeroWhenFinishTimeIsWithinMinRepairInterval() + { + // record a last finish repair time of one 30 minutes + long thirtyMinutesAgo = AutoRepair.instance.currentTimeMs() - 1_800_000; + metrics.recordRepairStartLag(thirtyMinutesAgo); + + // expect 0 lag because last repair finish time was less than min repair interval + assertEquals(0, metrics.repairStartLagSec.getValue().intValue()); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 1cc80791b4e6..73efae40130e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -276,6 +276,8 @@ public void testRepair() //if repair was done then lastRepairTime should be non-zero Assert.assertTrue(String.format("Expected lastRepairTime > 0, actual value lastRepairTime %d", lastRepairTime), lastRepairTime > 0); + // repair start lag sec should be reset on a successful repair + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).repairStartLagSec.getValue().intValue()); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index d79f0cb6b4c8..b800b213bbb6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -196,7 +196,7 @@ public void testGetCurrentRepairStatus() SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString(), regularRepair)); - CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType); + CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType, AutoRepairUtils.getAutoRepairHistory(repairType), hostId); assertNotNull(status); assertEquals(1, status.historiesWithoutOnGoingRepair.size()); @@ -207,6 +207,7 @@ public void testGetCurrentRepairStatus() assertTrue(status.hostIdsWithOnGoingForceRepair.contains(forceRepair)); assertEquals(1, status.priority.size()); assertTrue(status.priority.contains(regularRepair)); + assertEquals(hostId, status.myRepairHistory.hostId); } @Test From 5197fdc808bb91dd25085b57dfe379d4218f4a17 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 30 Mar 2025 13:25:02 -0700 Subject: [PATCH 187/257] Add toString to AutoRepairConfig.java --- .../repair/autorepair/AutoRepairConfig.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 739a57e9c746..b8d6d105fc2a 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -594,4 +594,16 @@ protected T applyOverrides(RepairType repairType, Function optio // Otherwise check defaults return getOverride(Options.getDefaultOptionsMap().get(repairType), optionSupplier); } + + public String toString() + { + return "AutoRepairConfig{" + + "enabled=" + enabled + + ", repair_check_interval=" + repair_check_interval + + ", history_clear_delete_hosts_buffer_interval=" + history_clear_delete_hosts_buffer_interval + + ", repair_task_min_duration=" + repair_task_min_duration + + ", global_settings=" + global_settings + + ", repair_type_overrides=" + repair_type_overrides + + "}"; + } } From a775b503c93a0d39adc8873bd5310161ed1a55b6 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 6 Apr 2025 14:15:36 -0700 Subject: [PATCH 188/257] Create AutoRepair executors only if enabled and a few other nits --- conf/cassandra.yaml | 2 +- conf/cassandra_latest.yaml | 2 +- .../pages/managing/operating/auto_repair.adoc | 2 +- .../repair/autorepair/AutoRepair.java | 45 ++++++++----------- .../repair/autorepair/AutoRepairConfig.java | 12 +---- .../cassandra/service/AutoRepairService.java | 2 +- .../cassandra/service/StorageService.java | 2 +- .../AutoRepairParameterizedTest.java | 22 +-------- .../repair/autorepair/AutoRepairTest.java | 40 +++++++---------- .../FixedSplitTokenRangeSplitterHelper.java | 8 ++-- 10 files changed, 48 insertions(+), 89 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 00b5e6e38ad1..7dbf1b3fabe2 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2800,7 +2800,7 @@ storage_compatibility_mode: NONE # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. # initial_scheduler_delay: 5m -# # Timeout for resuming stuck repair sessions. +# # Timeout for retrying stuck repair sessions. # repair_session_timeout: 3h # # Force immediate repair on new nodes after they join the ring. # force_repair_new_node: false diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index befaa7ece68e..6dfc89a975b9 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2493,7 +2493,7 @@ storage_compatibility_mode: NONE # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. # initial_scheduler_delay: 5m -# # Timeout for resuming stuck repair sessions. +# # Timeout for retrying stuck repair sessions. # repair_session_timeout: 3h # # Force immediate repair on new nodes after they join the ring. # force_repair_new_node: false diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index 06c0db456753..3928d0afccef 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -205,7 +205,7 @@ node. Defaults to true and is only evaluated when allow_parallel_replica_repair | materialized_view_repair_enabled | false | Repairs materialized views if true. | initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. -| repair_session_timeout | 3h | Timeout for resuming stuck repair sessions. +| repair_session_timeout | 3h | Timeout for retrying stuck repair sessions. | force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. | sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. | table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 51704ce2647b..8c08ce7c80cb 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -40,7 +40,6 @@ import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.config.DurationSpec; -import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Clock; import org.slf4j.Logger; @@ -84,12 +83,12 @@ public class AutoRepair public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); @VisibleForTesting - public final Map repairStates; + public Map repairStates; @VisibleForTesting - protected final Map repairExecutors; + protected Map repairExecutors; - protected final Map repairRunnableExecutors; + protected Map repairRunnableExecutors; @VisibleForTesting // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair @@ -99,21 +98,13 @@ public class AutoRepair @VisibleForTesting protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; - private boolean isSetupDone = false; + @VisibleForTesting + public boolean isSetupDone = false; public static AutoRepair instance = new AutoRepair(); - @VisibleForTesting - protected AutoRepair() + private AutoRepair() { - repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); - repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); - repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) - { - repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); - repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); - repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); - } + // Private constructor to prevent instantiation } public void setup() @@ -126,6 +117,16 @@ public void setup() { return; } + repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); + repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); + repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); + } + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); AutoRepairUtils.setup(); @@ -144,16 +145,6 @@ public void setup() } } - // repairAsync runs a repair session of the given type asynchronously. - public void repairAsync(AutoRepairConfig.RepairType repairType) - { - if (!AutoRepairService.instance.getAutoRepairConfig().isAutoRepairEnabled(repairType)) - { - throw new ConfigurationException("Auto-repair is disabled for repair type " + repairType); - } - repairExecutors.get(repairType).submit(() -> repair(repairType)); - } - /** * @return The current observed system time in ms. */ @@ -554,7 +545,7 @@ public boolean isSuccess() public void progress(String tag, ProgressEvent event) { ProgressEventType type = event.getType(); - String message = String.format("[%s] %s", format.format(Clock.Global.currentTimeMillis()), event.getMessage()); + String message = String.format("[%s] %s", format.format(timeFunc.get()), event.getMessage()); if (type == ProgressEventType.ERROR) { logger.error("Repair failure for repair {}: {}", repairType.toString(), message); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index b8d6d105fc2a..9d842888d585 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -320,16 +320,6 @@ public ParameterizedClass getTokenRangeSplitter(RepairType repairType) return applyOverrides(repairType, opt -> opt.token_range_splitter); } - /** - * Set a new token range splitter, this is not meant to be used other than for testing. - */ - @VisibleForTesting - void setTokenRangeSplitter(RepairType repairType, ParameterizedClass tokenRangeSplitter) - { - getOptions(repairType).token_range_splitter = tokenRangeSplitter; - tokenRangeSplitters.remove(repairType); - } - public IAutoRepairTokenRangeSplitter getTokenRangeSplitterInstance(RepairType repairType) { return tokenRangeSplitters.computeIfAbsent(repairType, @@ -530,7 +520,7 @@ protected static Options getDefaultOptions() public volatile ParameterizedClass token_range_splitter; // After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; - // Timeout for resuming stuck repair sessions. + // Timeout for retrying stuck repair sessions. public volatile DurationSpec.IntSecondsBound repair_session_timeout; // Maximum number of retries for a repair session. public volatile Integer repair_max_retries = 3; diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 4d34a53268f1..db1b29c38968 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -71,7 +71,7 @@ public void checkCanRun(String repairType) public void checkCanRun(RepairType repairType) { if (!config.isAutoRepairSchedulingEnabled()) - throw new ConfigurationException("Auto-repair scheduller is disabled."); + throw new ConfigurationException("Auto-repair scheduler is disabled."); if (repairType != RepairType.INCREMENTAL) return; diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 8ce1dd94bd4b..5d22ba4c058a 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -1143,8 +1143,8 @@ public void doAutoRepairSetup() { logger.info("Enabling auto-repair scheduling"); AutoRepair.instance.setup(); + logger.info("AutoRepair setup complete!"); } - logger.info("AutoRepair setup complete!"); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 73efae40130e..8c3127ee3097 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -180,8 +180,8 @@ public void setup() Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY).truncateBlocking(); Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY).truncateBlocking(); - - AutoRepair.instance = new AutoRepair(); + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); executeCQL(); timeFuncCalls = 0; @@ -240,24 +240,6 @@ private void executeCQL() .forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); } - @Test(expected = ConfigurationException.class) - public void testRepairAsyncWithRepairTypeDisabled() - { - AutoRepairService.instance.getAutoRepairConfig().setAutoRepairEnabled(repairType, false); - - AutoRepair.instance.repairAsync(repairType); - } - - @Test - public void testRepairAsync() - { - AutoRepair.instance.repairExecutors.put(repairType, mockExecutor); - - AutoRepair.instance.repairAsync(repairType); - - verify(mockExecutor, Mockito.times(1)).submit(Mockito.any(Runnable.class)); - } - @Test public void testRepairTurn() { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index caae31c69db1..1eceb386ee25 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -68,14 +68,12 @@ public void setup() @Test public void testSetup() { - AutoRepair instance = new AutoRepair(); - instance.setup(); - - assertEquals(RepairType.values().length, instance.repairExecutors.size()); - for (RepairType repairType : instance.repairExecutors.keySet()) + AutoRepair.instance.setup(); + assertEquals(RepairType.values().length, AutoRepair.instance.repairExecutors.size()); + for (RepairType repairType : AutoRepair.instance.repairExecutors.keySet()) { - int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() - + instance.repairExecutors.get(repairType).getActiveTaskCount(); + int expectedTasks = AutoRepair.instance.repairExecutors.get(repairType).getPendingTaskCount() + + AutoRepair.instance.repairExecutors.get(repairType).getActiveTaskCount(); assertTrue(String.format("Expected > 0 task in queue for %s but was %s", repairType, expectedTasks), expectedTasks > 0); } @@ -84,18 +82,16 @@ public void testSetup() @Test public void testSafeGuardSetupCall() { - AutoRepair instance = new AutoRepair(); - // only one should be setup, and rest should be ignored - instance.setup(); - instance.setup(); - instance.setup(); + AutoRepair.instance.setup(); + AutoRepair.instance.setup(); + AutoRepair.instance.setup(); - assertEquals(RepairType.values().length, instance.repairExecutors.size()); - for (RepairType repairType : instance.repairExecutors.keySet()) + assertEquals(RepairType.values().length, AutoRepair.instance.repairExecutors.size()); + for (RepairType repairType : AutoRepair.instance.repairExecutors.keySet()) { - int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() - + instance.repairExecutors.get(repairType).getActiveTaskCount(); + int expectedTasks = AutoRepair.instance.repairExecutors.get(repairType).getPendingTaskCount() + + AutoRepair.instance.repairExecutors.get(repairType).getActiveTaskCount(); assertTrue(String.format("Expected > 0 task in queue for %s but was %s", repairType, expectedTasks), expectedTasks > 0); } @@ -108,9 +104,8 @@ public void testSetupFailsWhenIREnabledWithCDCReplay() DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); - - AutoRepair instance = new AutoRepair(); - instance.setup(); + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); } @Test @@ -120,8 +115,7 @@ public void testNoFailureIfMVRepairOnButConfigIsOff() DatabaseDescriptor.getAutoRepairConfig().setMaterializedViewRepairEnabled(RepairType.INCREMENTAL, false); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - AutoRepair instance = new AutoRepair(); - instance.setup(); + AutoRepair.instance.setup(); } @Test(expected = ConfigurationException.class) @@ -131,8 +125,8 @@ public void testSetupFailsWhenIREnabledWithMVReplay() DatabaseDescriptor.getAutoRepairConfig().setMaterializedViewRepairEnabled(RepairType.INCREMENTAL, true); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - AutoRepair instance = new AutoRepair(); - instance.setup(); + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index cfa0748f1f7f..dac4a167d556 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -37,6 +37,8 @@ import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.compatibility.TokenRingUtils; +import org.apache.cassandra.utils.FBUtilities; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; @@ -78,7 +80,7 @@ public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRan { int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); - Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); assertEquals(numTokens, tokens.size()); List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); @@ -123,7 +125,7 @@ public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSub { int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); - Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); assertEquals(numTokens, tokens.size()); List> expectedToken = new ArrayList<>(); for (Range range : tokens) @@ -153,7 +155,7 @@ public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSub public static void testTokenRangesWithDefaultSplit(int numTokens, AutoRepairConfig.RepairType repairType) { int numberOfSplits = calcSplits(numTokens, DEFAULT_NUMBER_OF_SUBRANGES); - Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); assertEquals(numTokens, tokens.size()); List> expectedToken = new ArrayList<>(); for (Range range : tokens) From 17e0d9adef193ceffdacda04912da7293f041cc5 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 6 Apr 2025 20:12:54 -0700 Subject: [PATCH 189/257] parens in next line --- .../autorepair/AutoRepairStateTest.java | 8 ++- .../service/ActiveRepairServiceTest.java | 63 ++++++++++--------- .../nodetool/SSTableRepairedSetTest.java | 29 ++++++--- 3 files changed, 59 insertions(+), 41 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index fc4ac2f03c79..422ebdff5192 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -185,7 +185,9 @@ public void testGetLongestUnrepairedSecNull() try { assertEquals(0, state.getLongestUnrepairedSec()); - } catch (Exception e) { + } + catch (Exception e) + { assertNull(e); } } @@ -201,7 +203,9 @@ public void testGetLongestUnrepairedSec() try { assertEquals(1, state.getLongestUnrepairedSec()); - } catch (Exception e) { + } + catch (Exception e) + { assertNull(e); } } diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index 6ccfbcbfa971..455f5194444a 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -1,21 +1,21 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.cassandra.service; import java.net.UnknownHostException; @@ -40,6 +40,7 @@ import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.concurrent.Condition; + import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -229,12 +230,12 @@ public void testGetNeighborsTimesTwoInSpecifiedHosts() throws Throwable } expected.remove(FBUtilities.getBroadcastAddressAndPort()); - Collection hosts = Arrays.asList(FBUtilities.getBroadcastAddressAndPort().getHostAddressAndPort(),expected.get(0).getHostAddressAndPort()); + Collection hosts = Arrays.asList(FBUtilities.getBroadcastAddressAndPort().getHostAddressAndPort(), expected.get(0).getHostAddressAndPort()); Iterable> ranges = StorageService.instance.getLocalReplicas(KEYSPACE5).ranges(); assertEquals(expected.get(0), ActiveRepairService.instance().getNeighbors(KEYSPACE5, ranges, - ranges.iterator().next(), - null, hosts).endpoints().iterator().next()); + ranges.iterator().next(), + null, hosts).endpoints().iterator().next()); } @Test(expected = IllegalArgumentException.class) @@ -264,7 +265,6 @@ public void testParentRepairStatus() throws Throwable List failed = StorageService.instance.getParentRepairStatus(3); assertNotNull(failed); assertEquals(ActiveRepairService.ParentRepairStatus.FAILED, ActiveRepairService.ParentRepairStatus.valueOf(failed.get(0))); - } Set addTokens(int max) throws Throwable @@ -339,10 +339,10 @@ private static RepairOption opts(String... params) { assert params.length % 2 == 0 : "unbalanced key value pairs"; Map opt = new HashMap<>(); - for (int i=0; i<(params.length >> 1); i++) + for (int i = 0; i < (params.length >> 1); i++) { int idx = i << 1; - opt.put(params[idx], params[idx+1]); + opt.put(params[idx], params[idx + 1]); } return RepairOption.parse(opt, DatabaseDescriptor.getPartitioner()); } @@ -362,19 +362,19 @@ public void repairedAt() throws Exception Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true)), false)); // subrange incremental repair Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - RANGES_KEY, "1:2"), false)); + RANGES_KEY, "1:2"), false)); // hosts incremental repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - HOSTS_KEY, "127.0.0.1"), false)); + HOSTS_KEY, "127.0.0.1"), false)); // dc incremental repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - DATACENTERS_KEY, "DC2"), false)); + DATACENTERS_KEY, "DC2"), false)); // forced incremental repair Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - FORCE_REPAIR_KEY, b2s(true)), false)); + FORCE_REPAIR_KEY, b2s(true)), false)); Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - FORCE_REPAIR_KEY, b2s(true)), true)); + FORCE_REPAIR_KEY, b2s(true)), true)); // full repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(false)), false)); @@ -420,7 +420,8 @@ public void testRejectWhenPoolFullStrategy() throws InterruptedException // Submission is unblocked Thread.sleep(250); - validationExecutor.submit(() -> {}); + validationExecutor.submit(() -> { + }); } finally { @@ -457,8 +458,8 @@ public void testQueueWhenPoolFullStrategy() throws InterruptedException allSubmitted.await(TASK_SECONDS + 1, TimeUnit.SECONDS); // Give the tasks we expect to execute immediately chance to be scheduled - Util.spinAssertEquals(2 , ((ExecutorPlus) validationExecutor)::getActiveTaskCount, 1); - Util.spinAssertEquals(3 , ((ExecutorPlus) validationExecutor)::getPendingTaskCount, 1); + Util.spinAssertEquals(2, ((ExecutorPlus) validationExecutor)::getActiveTaskCount, 1); + Util.spinAssertEquals(3, ((ExecutorPlus) validationExecutor)::getPendingTaskCount, 1); // verify that we've reached a steady state with 2 threads actively processing and 3 queued tasks Assert.assertEquals(2, ((ExecutorPlus) validationExecutor).getActiveTaskCount()); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java index edd57447ad13..5d23d22253ad 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java @@ -50,15 +50,23 @@ public class SSTableRepairedSetTest private SSTableRepairedSet cmd; @Before - public void setUp() { + public void setUp() + { MockitoAnnotations.initMocks(this); - PrintStream noopStream = new PrintStream(new OutputStream() {@Override public void write(int b) {}}); + PrintStream noopStream = new PrintStream(new OutputStream() + { + @Override + public void write(int b) + { + } + }); when(probe.output()).thenReturn(new Output(noopStream, noopStream)); cmd = new SSTableRepairedSet(); } @Test - public void testNoKeyspace() { + public void testNoKeyspace() + { when(probe.getNonLocalStrategyKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); when(probe.getAutoRepairTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(Arrays.asList("table1", "table2"))); @@ -73,7 +81,8 @@ public void testNoKeyspace() { } @Test - public void testBothRepairedAndUnrepaired() { + public void testBothRepairedAndUnrepaired() + { cmd.args = Arrays.asList("keyspace"); cmd.isRepaired = true; cmd.isUnrepaired = true; @@ -82,14 +91,16 @@ public void testBothRepairedAndUnrepaired() { } @Test - public void testNeitherRepairedNorUnrepaired() { + public void testNeitherRepairedNorUnrepaired() + { cmd.args = Arrays.asList("keyspace"); cmd.execute(probe); verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); } @Test - public void testRepairedPreview() { + public void testRepairedPreview() + { cmd.args = Arrays.asList("keyspace"); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isRepaired = true; @@ -98,7 +109,8 @@ public void testRepairedPreview() { } @Test - public void testUnrepairedReallySet() { + public void testUnrepairedReallySet() + { cmd.args = Arrays.asList("keyspace"); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isUnrepaired = true; @@ -108,7 +120,8 @@ public void testUnrepairedReallySet() { } @Test - public void testExecuteWithTableNames() { + public void testExecuteWithTableNames() + { cmd.args = Arrays.asList("keyspace", "table1", "table2"); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isRepaired = true; From 85a0473ecde65244cbacea50b9e0418bbef23547 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 28 Jan 2025 18:32:14 -0800 Subject: [PATCH 190/257] Adjust based on the latest trunk --- .../repair/autorepair/AutoRepairState.java | 4 +--- .../repair/autorepair/AutoRepairUtils.java | 2 +- .../autorepair/RepairTokenRangeSplitter.java | 14 ++++++++------ .../config/YamlConfigurationLoaderTest.java | 2 +- .../repair/autorepair/AutoRepairUtilsTest.java | 15 --------------- .../repair/autorepair/SSTableRepairedAtTest.java | 1 - 6 files changed, 11 insertions(+), 27 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index e5e848c9d50b..58140bba33b6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -311,8 +311,7 @@ public PreviewRepairedState() public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, - AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, - !ranges.isEmpty(), false, false, PreviewKind.REPAIRED, false, true, false, false); + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.REPAIRED, false, true, false, false, false, false); option.getColumnFamilies().addAll(tables); @@ -379,7 +378,6 @@ public RepairCoordinator getRepairRunnable(String keyspace, List tables, AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.NONE, true, true, false, false, false, false); - option.getColumnFamilies().addAll(tables); return getRepairRunnable(keyspace, option); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index a35542186561..e35f491366ed 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -797,7 +797,7 @@ public static boolean shouldConsiderKeyspace(Keyspace ks) if (replicationStrategy instanceof NetworkTopologyStrategy) { Set datacenters = ((NetworkTopologyStrategy) replicationStrategy).getDatacenters(); - String localDC = DatabaseDescriptor.getLocator().location(FBUtilities.getBroadcastAddressAndPort()).datacenter; + String localDC = DatabaseDescriptor.getLocator().local().datacenter; if (!datacenters.contains(localDC)) { repair = false; diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 05fda709eabf..7f12020f3020 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -37,6 +37,11 @@ import java.util.stream.Collectors; import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.db.DataRange; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.io.sstable.SSTableReadsListener; +import org.apache.cassandra.io.sstable.format.big.BigTableReader; import org.apache.cassandra.tcm.compatibility.TokenRingUtils; import org.apache.cassandra.utils.FBUtilities; import org.slf4j.Logger; @@ -54,7 +59,6 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.big.BigTableScanner; import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; @@ -692,12 +696,11 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai // get the bounds of the sstable for this range using the index file but do not actually read it. List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); - ISSTableScanner rangeScanner = reader.getScanner(Collections.singleton(tokenRange)); - // Type check scanner returned as it may be an EmptySSTableScanner if the range is not covered in the + // Type check bounds to check if the range is not covered in the // SSTable, in this case we will avoid incrementing approxBytesInRange. - if (rangeScanner instanceof BigTableScanner) + if (!bounds.isEmpty() && reader instanceof BigTableReader) { - try (BigTableScanner scanner = (BigTableScanner) rangeScanner) + try (BigTableScanner scanner = (BigTableScanner) BigTableScanner.getScanner((BigTableReader) reader, ColumnFilter.all(reader.metadata()), DataRange.forTokenRange(tokenRange), SSTableReadsListener.NOOP_LISTENER)) { assert bounds.size() == 1; @@ -714,7 +717,6 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); } } - } catch (IOException | CardinalityMergeException e) { diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index b2708b08b81a..3b7bf529ed31 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -565,4 +565,4 @@ public static Config load(String path) } return new YamlConfigurationLoader().loadConfig(url); } -} \ No newline at end of file +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index d80b26a7fc31..1f64638bd734 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -34,7 +34,6 @@ import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.marshal.UUIDType; -import org.apache.cassandra.locator.IEndpointSnitch; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Locator; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; @@ -48,8 +47,6 @@ import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.utils.FBUtilities; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; @@ -64,7 +61,6 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.when; public class AutoRepairUtilsTest extends CQLTester { @@ -73,18 +69,12 @@ public class AutoRepairUtilsTest extends CQLTester static InetAddressAndPort localEndpoint; - @Mock - static Locator snitchMock; - - static Locator defaultSnitch; - @BeforeClass public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); setAutoRepairEnabled(true); requireNetwork(); - defaultSnitch = DatabaseDescriptor.getLocator(); localEndpoint = FBUtilities.getBroadcastAddressAndPort(); hostId = StorageService.instance.getHostIdForEndpoint(localEndpoint); StorageService.instance.doAutoRepairSetup(); @@ -98,7 +88,6 @@ public void setup() QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", "ks", "tbl")); AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); - MockitoAnnotations.initMocks(this); QueryProcessor.executeInternal(String.format( "TRUNCATE %s.%s", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); @@ -234,10 +223,6 @@ public void testGetHostIdsInCurrentRing_multiple_nodes() InetAddressAndPort ignoredEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 1); InetAddressAndPort deadEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 2); DatabaseDescriptor.getAutoRepairConfig().setIgnoreDCs(repairType, ImmutableSet.of("dc2")); - //DatabaseDescriptor.setEndpointSnitch(snitchMock); - when(snitchMock.location(localEndpoint)).thenReturn(new Location("dc1", "rac1")); - when(snitchMock.location(ignoredEndpoint)).thenReturn(new Location("dc2", "rac1")); - when(snitchMock.location(deadEndpoint)).thenReturn(new Location("dc1", "rac1")); TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType, ImmutableSet.of(new NodeAddresses(localEndpoint), new NodeAddresses(ignoredEndpoint), new NodeAddresses(deadEndpoint))); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java index f1de19b6723d..8c0588cb1244 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -34,7 +34,6 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.service.StorageService; From cd42a84372cd23db06551035fd99218be517f9d7 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 3 Feb 2025 18:08:09 -0800 Subject: [PATCH 191/257] Remove commented code; use TokenRingUtils.getPrimaryRangesForEndpoint instead of StorageService.instance.getPrimaryRanges --- .../apache/cassandra/repair/autorepair/AutoRepairUtils.java | 1 - .../repair/autorepair/FixedSplitTokenRangeSplitter.java | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index e35f491366ed..f3f33e2c7a14 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -730,7 +730,6 @@ public static void addPriorityHosts(RepairType repairType, Set tableNames = repairPlan.getTableNames(); - Collection> tokens = StorageService.instance.getPrimaryRanges(keyspaceName); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(keyspaceName, FBUtilities.getBroadcastAddressAndPort()); if (!primaryRangeOnly) { // if we need to repair non-primary token ranges, then change the tokens accordingly From cf57ad6d1620b2cd0219f416896115d74345c7f6 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Feb 2025 18:58:55 -0600 Subject: [PATCH 192/257] Update RepairTokenRangeSplitter to work with BTI-formatted SSTables While doing a review pass on the CEP-37 PR I realized that RepairTokenRangeSplitter could not possibly work with the new BTI sstable format introduced in 5.0 because it explicitly checks for BigTableReader implementations. In addition, the way it calculates the amount of bytes a range covers in an SSTable is big-format specific. I noticed that a new utility method SSTableReader.onDiskSizeForPartitionPositions was added recently in CASSANDRA-20092 that can effectively calculate this for us in an implementation-agnostic way. This change updates the code to use this method, and also remove any changes we previously made in SSTableScanner and BigTableScanner for this. Patch by Andy Tolbert; Reviewed by ___ for CASSANDRA-20315 --- .../io/sstable/format/SSTableScanner.java | 10 ---- .../sstable/format/big/BigTableScanner.java | 45 ++---------------- .../autorepair/RepairTokenRangeSplitter.java | 46 ++++++------------- .../RepairTokenRangeSplitterTest.java | 30 +++++++----- 4 files changed, 37 insertions(+), 94 deletions(-) diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java index 001152d66d11..28035a85da0b 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Set; @@ -36,7 +35,6 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.AbstractBounds.Boundary; import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.AbstractRowIndexEntry; import org.apache.cassandra.io.sstable.CorruptSSTableException; import org.apache.cassandra.io.sstable.ISSTableScanner; @@ -85,14 +83,6 @@ protected SSTableScanner(S sstable, this.listener = listener; } - public static List> makeBounds(SSTableReader sstable, Collection> tokenRanges) - { - List> boundsList = new ArrayList<>(tokenRanges.size()); - for (Range range : Range.normalize(tokenRanges)) - addRange(sstable, Range.makeRowRange(range), boundsList); - return boundsList; - } - protected static List> makeBounds(SSTableReader sstable, DataRange dataRange) { List> boundsList = new ArrayList<>(2); diff --git a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java index b1e632e5b44c..83243529c4c9 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java @@ -20,8 +20,6 @@ import java.io.IOException; import java.util.Iterator; -import javax.annotation.Nullable; - import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; @@ -66,12 +64,9 @@ private BigTableScanner(BigTableReader sstable, this.rowIndexEntrySerializer = new RowIndexEntry.Serializer(sstable.descriptor.version, sstable.header, sstable.owner().map(SSTable.Owner::getMetrics).orElse(null)); } - // Helper method to seek to the index for the given position or range and optionally in the data file - private long seekAndProcess(@Nullable PartitionPosition position, - @Nullable AbstractBounds range, - boolean seekDataFile) throws CorruptSSTableException + private void seekToCurrentRangeStart() { - long indexPosition = sstable.getIndexScanPosition(position); + long indexPosition = sstable.getIndexScanPosition(currentRange.left); ifile.seek(indexPosition); try { @@ -80,21 +75,13 @@ private long seekAndProcess(@Nullable PartitionPosition position, { indexPosition = ifile.getFilePointer(); DecoratedKey indexDecoratedKey = sstable.decorateKey(ByteBufferUtil.readWithShortLength(ifile)); - - // Whether the position or range is present in the SSTable. - boolean isFound = (range == null && indexDecoratedKey.compareTo(position) > 0) - || (range != null && (indexDecoratedKey.compareTo(range.left) > 0 || range.contains(indexDecoratedKey))); - if (isFound) + if (indexDecoratedKey.compareTo(currentRange.left) > 0 || currentRange.contains(indexDecoratedKey)) { // Found, just read the dataPosition and seek into index and data files long dataPosition = RowIndexEntry.Serializer.readPosition(ifile); - // seek the index file position as we will presumably seek further when going to the next position. ifile.seek(indexPosition); - if (seekDataFile) - { - dfile.seek(dataPosition); - } - return dataPosition; + dfile.seek(dataPosition); + break; } else { @@ -107,28 +94,6 @@ private long seekAndProcess(@Nullable PartitionPosition position, sstable.markSuspect(); throw new CorruptSSTableException(e, sstable.getFilename()); } - // If for whatever reason we don't find position in file, just return 0 - return 0L; - } - - /** - * Seeks to the start of the current range and updates both the index and data file positions. - */ - private void seekToCurrentRangeStart() throws CorruptSSTableException - { - seekAndProcess(currentRange.left, currentRange, true); - } - - /** - * Gets the position in the data file, but does not seek to it. This does seek the index to find the data position - * but does not actually seek the data file. - * @param position position to find in data file. - * @return offset in data file where position exists. - * @throws CorruptSSTableException if SSTable was malformed - */ - public long getDataPosition(PartitionPosition position) throws CorruptSSTableException - { - return seekAndProcess(position, null, false); } protected void doClose() throws IOException diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 7f12020f3020..82a63caea1bc 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -38,10 +38,6 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.db.DataRange; -import org.apache.cassandra.db.filter.ColumnFilter; -import org.apache.cassandra.io.sstable.SSTableReadsListener; -import org.apache.cassandra.io.sstable.format.big.BigTableReader; import org.apache.cassandra.tcm.compatibility.TokenRingUtils; import org.apache.cassandra.utils.FBUtilities; import org.slf4j.Logger; @@ -56,11 +52,9 @@ import org.apache.cassandra.db.lifecycle.SSTableIntervalTree; import org.apache.cassandra.db.lifecycle.SSTableSet; import org.apache.cassandra.db.lifecycle.View; -import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; -import org.apache.cassandra.io.sstable.format.big.BigTableScanner; import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; import org.apache.cassandra.io.sstable.metadata.MetadataType; import org.apache.cassandra.io.util.FileUtils; @@ -210,7 +204,8 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 100GiB of * data is written to a node per min_repair_interval. */ - private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) {{ + private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) + {{ put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) .build()); // Restrict incremental repair to 50GB bytes per assignment to confine the amount of possible autocompaction. @@ -359,7 +354,8 @@ else if (tableAssignments.size() == 1 && long currentAssignmentsBytes = getEstimatedBytes(currentAssignments); long tableAssignmentsBytes = getEstimatedBytes(tableAssignments); // only add assignments together if they don't exceed max bytes per schedule. - if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule.toBytes()) { + if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule.toBytes()) + { currentAssignments.addAll(tableAssignments); } else @@ -677,6 +673,7 @@ private List getRangeSizeEstimate(String keyspace, String table, C @VisibleForTesting static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repairType, String keyspace, String table, Range tokenRange, Refs refs) { + List> singletonRange = Collections.singletonList(tokenRange); ICardinality cardinality = new HyperLogLogPlus(13, 25); long approxBytesInRange = 0L; long totalBytes = 0L; @@ -691,32 +688,14 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai if (metadata != null) cardinality = cardinality.merge(metadata.cardinalityEstimator); - long sstableSize = reader.bytesOnDisk(); + // use onDiskLength, which is the actual size of the SSTable data file. + long sstableSize = reader.onDiskLength(); totalBytes += sstableSize; - // get the bounds of the sstable for this range using the index file but do not actually read it. - List> bounds = BigTableScanner.makeBounds(reader, Collections.singleton(tokenRange)); - // Type check bounds to check if the range is not covered in the - // SSTable, in this case we will avoid incrementing approxBytesInRange. - if (!bounds.isEmpty() && reader instanceof BigTableReader) - { - try (BigTableScanner scanner = (BigTableScanner) BigTableScanner.getScanner((BigTableReader) reader, ColumnFilter.all(reader.metadata()), DataRange.forTokenRange(tokenRange), SSTableReadsListener.NOOP_LISTENER)) - { - assert bounds.size() == 1; - - AbstractBounds bound = bounds.get(0); - long startPosition = scanner.getDataPosition(bound.left); - long endPosition = scanner.getDataPosition(bound.right); - // If end position is 0 we can assume the sstable ended before that token, bound at size of file - if (endPosition == 0) - { - endPosition = sstableSize; - } - - long approximateRangeBytesInSSTable = Math.max(0, endPosition - startPosition); - approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); - } - } + // get the on disk size for the token range, note for compressed data this includes the full + // chunks the start and end ranges are found in. + long approximateRangeBytesInSSTable = reader.onDiskSizeForPartitionPositions(reader.getPositionsForRanges(singletonRange)); + approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); } catch (IOException | CardinalityMergeException e) { @@ -881,7 +860,8 @@ public SizedRepairAssignment(Range tokenRange, String keyspaceName, List< /** * @return Additional metadata about the repair assignment. */ - public String getDescription() { + public String getDescription() + { return description; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index daeb6466fe2e..329973b81900 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -81,16 +81,19 @@ public void setUp() public void testSizePartitionCount() throws Throwable { insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - Refs sstables = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE); - assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); - SizeEstimate sizes = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); - assertEquals(10, sizes.partitions); + try (Refs sstables = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE)) + { + assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); + SizeEstimate sizes = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); + assertEquals(10, sizes.partitions); + } } @Test public void testSizePartitionCountSplit() throws Throwable { - int[] values = new int[10000]; + int partitionCount = 100_000; + int[] values = new int[partitionCount]; for (int i = 0; i < values.length; i++) values[i] = i + 1; insertAndFlushTable(tableName, values); @@ -99,12 +102,17 @@ public void testSizePartitionCountSplit() throws Throwable Range tokenRange2 = range.next(); Assert.assertFalse(range.hasNext()); - Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); - Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2); - SizeEstimate sizes1 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); - SizeEstimate sizes2 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); - // +-5% because HLL merge and the applying of range size approx ratio causes estimation errors - assertTrue(Math.abs(10000 - (sizes1.partitions + sizes2.partitions)) <= 60); + try(Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); + Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2)) + { + SizeEstimate sizes1 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); + SizeEstimate sizes2 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); + + // +-5% because including entire compression blocks covering token range, HLL merge and the applying of range size approx ratio causes estimation errors + long allowableDelta = (long) (partitionCount * .05); + long estimatedPartitionDelta = Math.abs(partitionCount - (sizes1.partitions + sizes2.partitions)); + assertTrue("Partition count delta was +/-" + estimatedPartitionDelta + " but expected +/- " + allowableDelta, estimatedPartitionDelta <= allowableDelta); + } } @Test From 8deeaeac1f76f2ae15aef61fa2689391d30f15fa Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Feb 2025 19:04:19 -0600 Subject: [PATCH 193/257] Avoid possible division by zero --- .../repair/autorepair/RepairTokenRangeSplitter.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 82a63caea1bc..b6dcfc61b4ae 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -702,9 +702,14 @@ static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repai logger.error("Error calculating size estimate for {}.{} for range {} on {}", keyspace, table, tokenRange, reader, e); } } - double ratio = approxBytesInRange / (double) totalBytes; - // use the ratio from size to estimate the partitions in the range as well - long partitions = (long) Math.max(1, Math.ceil(cardinality.cardinality() * ratio)); + + long partitions = 0L; + if (totalBytes > 0) + { + // use the ratio from size to estimate the partitions in the range as well + double ratio = approxBytesInRange / (double) totalBytes; + partitions = (long) Math.max(1, Math.ceil(cardinality.cardinality() * ratio)); + } return new SizeEstimate(repairType, keyspace, table, tokenRange, partitions, approxBytesInRange, totalBytes); } From 1d16ab1d05156243bd2e88acc1791c66bb541997 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Feb 2025 19:18:41 -0600 Subject: [PATCH 194/257] Test with Bti and Big format sstables --- .../autorepair/RepairTokenRangeSplitterTest.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 329973b81900..065b948c481d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -31,6 +31,8 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.apache.cassandra.config.DataStorageSpec.LongMebibytesBound; import org.apache.cassandra.config.DatabaseDescriptor; @@ -41,6 +43,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.big.BigFormat; +import org.apache.cassandra.io.sstable.format.bti.BtiFormat; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.FilteredRepairAssignments; import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.SizeEstimate; @@ -54,27 +57,36 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +@RunWith(Parameterized.class) public class RepairTokenRangeSplitterTest extends CQLTester { private RepairTokenRangeSplitter repairRangeSplitter; private String tableName; private static Range FULL_RANGE; + @Parameterized.Parameter() + public String sstableFormat; + + @Parameterized.Parameters(name = "sstableFormat={0}") + public static Collection sstableFormats() + { + return List.of(BtiFormat.NAME, BigFormat.NAME); + } + @BeforeClass public static void setUpClass() { CQLTester.setUpClass(); AutoRepairService.setup(); FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); - DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(BigFormat.NAME)); } @Before public void setUp() { + DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(sstableFormat)); repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); - assertTrue(BigFormat.isSelected()); } @Test From 6108532c030f56a8857720d8ffb165e000107f62 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Feb 2025 19:58:51 -0600 Subject: [PATCH 195/257] Assert correct format selected --- .../repair/autorepair/RepairTokenRangeSplitterTest.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 065b948c481d..d1f598682326 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -87,6 +87,15 @@ public void setUp() DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(sstableFormat)); repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + // ensure correct format is selected. + if (sstableFormat.equalsIgnoreCase(BigFormat.NAME)) + { + assertTrue(BigFormat.isSelected()); + } + else + { + assertTrue(BtiFormat.isSelected()); + } } @Test From 2d48bfd0ccf0d25202761bcee773c1b0cbcf6553 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 10 Feb 2025 15:59:30 -0800 Subject: [PATCH 196/257] Fix review comments from Andy --- conf/cassandra.yaml | 5 ++ conf/cassandra_latest.yaml | 12 ++++ .../org/apache/cassandra/config/Config.java | 2 +- .../apache/cassandra/config/DurationSpec.java | 2 +- .../cassandra/config/ParameterizedClass.java | 2 +- .../db/streaming/CassandraStreamReceiver.java | 10 ++-- .../FixedSplitTokenRangeSplitter.java | 3 +- .../service/ActiveRepairService.java | 3 + .../service/ActiveRepairServiceMBean.java | 4 ++ .../cassandra/service/StorageService.java | 2 +- .../tcm/sequences/BootstrapAndJoin.java | 5 +- .../tcm/sequences/BootstrapAndReplace.java | 5 +- .../tcm/sequences/ReplaceSameAddress.java | 5 +- .../apache/cassandra/utils/FBUtilities.java | 2 +- .../config/YamlConfigurationLoaderTest.java | 2 +- .../service/AutoRepairServiceSetterTest.java | 60 +++++++++++-------- 16 files changed, 81 insertions(+), 43 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 5b6e0dc51fc5..acb017ad8765 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2675,6 +2675,11 @@ storage_compatibility_mode: NONE # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s +# at least 20% of disk must be unused to run incremental repair +# if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) +# then set the ratio to 0.0 +# incremental_repair_disk_headroom_reject_ratio = 0.2; + # Configuration for AutoRepair Scheduler. # auto_repair: # # Enable/Disable the auto-repair scheduler. diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index ed99e9eece19..cfc03943d515 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2047,6 +2047,13 @@ report_unconfirmed_repaired_data_mismatches: false # Materialized views are considered experimental and are not recommended for production use. materialized_views_enabled: false +# Specify whether Materialized View mutations are replayed through the write path on streaming, e.g. repair. +# When enabled, Materialized View data streamed to the destination node will be written into commit log first. When setting to false, +# the streamed Materialized View data is written into SSTables just the same as normal streaming. The default is true. +# If this is set to false, streaming will be considerably faster however it's possible that, in extreme situations +# (losing > quorum # nodes in a replica set), you may have data in your SSTables that never makes it to the Materialized View. +# materialized_views_on_repair_enabled: true + # Enables SASI index creation on this node. # SASI indexes are considered experimental and are not recommended for production use. sasi_indexes_enabled: false @@ -2363,6 +2370,11 @@ default_secondary_index_enabled: true # storage_compatibility_mode: NONE +# at least 20% of disk must be unused to run incremental repair +# if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) +# then set the ratio to 0.0 +# incremental_repair_disk_headroom_reject_ratio = 0.2; + # Configuration for AutoRepair Scheduler. # auto_repair: # # Enable/Disable the auto-repair scheduler. diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 9a0c45622167..1156675799d4 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -33,7 +33,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +43,7 @@ import org.apache.cassandra.index.internal.CassandraIndex; import org.apache.cassandra.io.compress.BufferType; import org.apache.cassandra.io.sstable.format.big.BigFormat; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.StartupChecks.StartupCheckType; import org.apache.cassandra.utils.StorageCompatibilityMode; diff --git a/src/java/org/apache/cassandra/config/DurationSpec.java b/src/java/org/apache/cassandra/config/DurationSpec.java index ee35de58e81b..969ba255ea6d 100644 --- a/src/java/org/apache/cassandra/config/DurationSpec.java +++ b/src/java/org/apache/cassandra/config/DurationSpec.java @@ -43,7 +43,7 @@ * users the opportunity to be able to provide config with a unit of their choice in cassandra.yaml as per the available * options. (CASSANDRA-15234) */ -public abstract class DurationSpec implements Serializable +public abstract class DurationSpec { /** * The Regexp used to parse the duration provided as String. diff --git a/src/java/org/apache/cassandra/config/ParameterizedClass.java b/src/java/org/apache/cassandra/config/ParameterizedClass.java index a38dbc1d2f0b..24f7307057e3 100644 --- a/src/java/org/apache/cassandra/config/ParameterizedClass.java +++ b/src/java/org/apache/cassandra/config/ParameterizedClass.java @@ -34,7 +34,7 @@ import static org.apache.cassandra.utils.Shared.Scope.SIMULATION; @Shared(scope = SIMULATION) -public class ParameterizedClass implements Serializable +public class ParameterizedClass { public static final String CLASS_NAME = "class_name"; public static final String PARAMETERS = "parameters"; diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 2c31b175679d..2ce86fcb715a 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -26,20 +26,18 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; -import org.apache.cassandra.io.sstable.SSTable; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.primitives.Ranges; +import io.github.jbellis.jvector.annotations.VisibleForTesting; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.compaction.OperationType; import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; import org.apache.cassandra.db.lifecycle.LifecycleTransaction; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.ThrottledUnfilteredIterator; @@ -49,6 +47,7 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.ISSTableScanner; +import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableMultiWriter; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.service.accord.AccordService; @@ -208,7 +207,8 @@ private boolean cdcRequiresWriteCommitLog(ColumnFamilyStore cfs) * For CDC-enabled tables and write path for CDC is enabled, we want to ensure that the mutations are * run through the CommitLog, so they can be archived by the CDC process on discard. */ - public boolean requiresWritePath(ColumnFamilyStore cfs) + @VisibleForTesting + boolean requiresWritePath(ColumnFamilyStore cfs) { return cdcRequiresWriteCommitLog(cfs) || cfs.streamToMemtable() diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 7596272e78fb..f4d09aa18d82 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -72,7 +72,8 @@ public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map< @Override public Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans) { - return new RepairAssignmentIterator(repairPlans) { + return new RepairAssignmentIterator(repairPlans) + { @Override protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index e39af4339ac5..22d78794bcee 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -51,6 +51,9 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DurationSpec; diff --git a/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java b/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java index 851dc6c802bb..c739b048d68f 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java @@ -74,4 +74,8 @@ public interface ActiveRepairServiceMBean int parentRepairSessionsCount(); public int getPaxosRepairParallelism(); public void setPaxosRepairParallelism(int v); + + public double getIncrementalRepairDiskHeadroomRejectRatio(); + + public void setIncrementalRepairDiskHeadroomRejectRatio(double value); } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 741c2bed7fb3..bd9b67bc37b0 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -1137,7 +1137,7 @@ public void doAutoRepairSetup() AutoRepairService.setup(); if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { - logger.info("Enable auto-repair scheduling"); + logger.info("Enabling auto-repair scheduling"); AutoRepair.instance.setup(); } logger.info("AutoRepair setup complete!"); diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java index 19d6cb50edae..0e32f18c2507 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java @@ -221,8 +221,6 @@ public SequenceState executeNext() "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); return halted(); } - // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else @@ -246,6 +244,9 @@ public SequenceState executeNext() .filter(cfs -> Schema.instance.getUserKeyspaces().names().contains(cfs.keyspace.getName())) .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); ClusterMetadataService.instance().commit(midJoin); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } else { diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java index f3a3a8ff4577..8bc73d142eb9 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java @@ -218,8 +218,6 @@ public SequenceState executeNext() "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); return halted(); } - // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.COMPLETED); } else @@ -243,6 +241,9 @@ public SequenceState executeNext() .filter(cfs -> Schema.instance.getUserKeyspaces().names().contains(cfs.keyspace.getName())) .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); ClusterMetadataService.instance().commit(midReplace); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } else { diff --git a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java index 1c55d84ca244..4580d716a1ba 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java +++ b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java @@ -86,8 +86,6 @@ public static void streamData(NodeId nodeId, ClusterMetadata metadata, boolean s "For more, see `nodetool help bootstrap`. {}", SystemKeyspace.getBootstrapState()); throw new IllegalStateException("Could not finish join for during replacement"); } - // this node might have just bootstrapped; check if we should run repair immediately - AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } if (finishJoiningRing) @@ -98,6 +96,9 @@ public static void streamData(NodeId nodeId, ClusterMetadata metadata, boolean s .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); BootstrapAndReplace.gossipStateToNormal(metadata, metadata.myNodeId()); Gossiper.instance.mergeNodeToGossip(metadata.myNodeId(), metadata); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } } } diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 9d55b4ca8280..ed44b84164f1 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -66,7 +66,6 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; -import org.apache.cassandra.io.util.File; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,6 +88,7 @@ import org.apache.cassandra.io.sstable.metadata.MetadataType; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputBufferFixed; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.security.AbstractCryptoProvider; diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 3b7bf529ed31..10075671e589 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -39,6 +39,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.io.util.File; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.yaml.snakeyaml.error.YAMLException; import static org.apache.cassandra.config.CassandraRelevantProperties.CONFIG_ALLOW_SYSTEM_PROPERTIES; @@ -46,7 +47,6 @@ import static org.apache.cassandra.config.YamlConfigurationLoader.SYSTEM_PROPERTY_PREFIX; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index 2f00b38d8190..d3a99590de07 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service; import com.google.common.collect.ImmutableSet; + import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; @@ -28,6 +29,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; + import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -47,7 +49,8 @@ import static org.assertj.core.api.Assertions.assertThat; @RunWith(Parameterized.class) -public class AutoRepairServiceSetterTest extends CQLTester { +public class AutoRepairServiceSetterTest extends CQLTester +{ private static final AutoRepairConfig config = new AutoRepairConfig(true); @Parameterized.Parameter @@ -63,45 +66,51 @@ public class AutoRepairServiceSetterTest extends CQLTester { public Function getter; @Parameterized.Parameters(name = "{index}: repairType={0}, arg={1}") - public static Collection testCases() { + public static Collection testCases() + { DatabaseDescriptor.setConfig(DatabaseDescriptor.loadConfig()); return Stream.of( - forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), - forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), - forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), - forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), - forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), - forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), - forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), - forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMaterializedViewRepairEnabled), - forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setRepairPriorityForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairUtils::getPriorityHosts), - forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setForceRepairForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairServiceSetterTest::isLocalHostForceRepair) + forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), + forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), + forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), + forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), + forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), + forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), + forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), + forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMaterializedViewRepairEnabled), + forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setRepairPriorityForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairUtils::getPriorityHosts), + forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setForceRepairForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairServiceSetterTest::isLocalHostForceRepair) ).flatMap(Function.identity()).collect(Collectors.toList()); } - private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) { + private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) + { UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( - "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); + "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); - if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) { + if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) + { return ImmutableSet.of(InetAddressAndPort.getLocalHost()); } return ImmutableSet.of(); } - private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) { + private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) + { Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) { - testCases[repairType.ordinal()] = new Object[]{repairType, arg, setter, getter}; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + testCases[repairType.ordinal()] = new Object[]{ repairType, arg, setter, getter }; } return Arrays.stream(testCases); } @BeforeClass - public static void setup() throws Exception { + public static void setup() throws Exception + { DatabaseDescriptor.daemonInitialization(); setAutoRepairEnabled(true); requireNetwork(); @@ -112,13 +121,14 @@ public static void setup() throws Exception { } @Before - public void prepare() { + public void prepare() + { QueryProcessor.executeInternal(String.format( - "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); QueryProcessor.executeInternal(String.format( - "TRUNCATE %s.%s", - SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); } @Test From 5436ffbe9a24e6b5832fb2cee3bbb461a13c2266 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 10 Feb 2025 20:46:18 -0800 Subject: [PATCH 197/257] Fix review comments from Jaydeepkumar --- conf/cassandra.yaml | 3 +- conf/cassandra_latest.yaml | 3 +- .../db/streaming/CassandraStreamReceiver.java | 2 +- .../cassandra/metrics/AutoRepairMetrics.java | 1 - .../metrics/AutoRepairMetricsManager.java | 3 ++ .../repair/autorepair/AutoRepair.java | 3 ++ .../repair/autorepair/AutoRepairConfig.java | 3 ++ .../repair/autorepair/AutoRepairState.java | 4 ++- .../FixedSplitTokenRangeSplitter.java | 3 ++ .../cassandra/schema/AutoRepairParams.java | 3 ++ .../apache/cassandra/schema/TableParams.java | 3 +- .../cassandra/service/AutoRepairService.java | 3 ++ .../service/AutoRepairServiceMBean.java | 3 ++ .../org/apache/cassandra/tools/NodeProbe.java | 26 ++++++++-------- .../tools/nodetool/AutoRepairStatus.java | 5 ++- .../tools/nodetool/GetAutoRepairConfig.java | 3 ++ .../tools/nodetool/SSTableRepairedSet.java | 5 ++- .../tools/nodetool/SetAutoRepairConfig.java | 25 ++++++++------- .../test/repair/AutoRepairSchedulerTest.java | 3 ++ test/unit/org/apache/cassandra/Util.java | 2 +- .../CassandraStreamReceiverTest.java | 3 ++ .../AutoRepairConfigRepairTypeTest.java | 1 - .../autorepair/AutoRepairConfigTest.java | 3 ++ .../autorepair/AutoRepairKeyspaceTest.java | 3 ++ .../AutoRepairParameterizedTest.java | 3 ++ .../AutoRepairStateFactoryTest.java | 3 ++ .../autorepair/AutoRepairStateTest.java | 3 ++ .../repair/autorepair/AutoRepairTest.java | 3 ++ .../autorepair/AutoRepairUtilsTest.java | 3 ++ .../FixedSplitTokenRangeSplitterTest.java | 3 ++ .../autorepair/PrioritizedRepairPlanTest.java | 3 ++ .../RepairTokenRangeSplitterTest.java | 3 ++ .../autorepair/SSTableRepairedAtTest.java | 3 ++ .../service/AutoRepairServiceBasicTest.java | 3 ++ .../AutoRepairServiceRepairTypeTest.java | 3 ++ .../service/AutoRepairServiceSetterTest.java | 3 ++ .../tools/nodetool/AutoRepairStatusTest.java | 5 ++- .../nodetool/SSTableRepairedSetTest.java | 7 +++-- .../nodetool/SetAutoRepairConfigTest.java | 31 ++++++++++--------- 39 files changed, 143 insertions(+), 52 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index acb017ad8765..39e0ab776964 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2675,7 +2675,8 @@ storage_compatibility_mode: NONE # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s -# at least 20% of disk must be unused to run incremental repair +# At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during +# incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) # then set the ratio to 0.0 # incremental_repair_disk_headroom_reject_ratio = 0.2; diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index cfc03943d515..da44d4f7b999 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2370,7 +2370,8 @@ default_secondary_index_enabled: true # storage_compatibility_mode: NONE -# at least 20% of disk must be unused to run incremental repair +# At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during +# incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) # then set the ratio to 0.0 # incremental_repair_disk_headroom_reject_ratio = 0.2; diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 2ce86fcb715a..61f64ce3d0af 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Set; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; @@ -30,7 +31,6 @@ import org.slf4j.LoggerFactory; import accord.primitives.Ranges; -import io.github.jbellis.jvector.annotations.VisibleForTesting; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index b097dd3414c4..0f5cefd30899 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -91,7 +91,6 @@ public Integer getValue() } }); - longestUnrepairedSec = Metrics.register(factory.createMetricName("LongestUnrepairedSec"), new Gauge() { public Integer getValue() diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java index e293945c9846..e97ce34e5a73 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java @@ -23,6 +23,9 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +/** + * AutoRepair metrics manager holding all the auto-repair related metrics. + */ public class AutoRepairMetricsManager { private static final Map metrics = new ConcurrentHashMap<>(); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 46e84e59f25b..bd656a46340d 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -63,6 +63,9 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +/** + * AutoRepair scheduler responsible for running different types of repairs. + */ public class AutoRepair { private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 80381af73139..19c3029011de 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -39,6 +39,9 @@ import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.FBUtilities; +/** + * Defines configurations for AutoRepair. + */ public class AutoRepairConfig implements Serializable { // Enable/Disable the auto-repair scheduler. diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index 58140bba33b6..a08fa08f350b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -55,7 +55,9 @@ import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; -// AutoRepairState represents the state of automated repair for a given repair type. +/** + * AutoRepairState represents the state of automated repair for a given repair type. + */ public abstract class AutoRepairState implements ProgressListener { protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index f4d09aa18d82..a8d8b4744484 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -38,6 +38,9 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; +/** + * An implementation that splits token ranges into a fixed number of subranges. + */ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { private static final Logger logger = LoggerFactory.getLogger(FixedSplitTokenRangeSplitter.class); diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index 105b9f6d0ddd..b2de800f83e0 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -33,6 +33,9 @@ import static java.lang.String.format; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; +/** + * AutoRepair table parameters - used to define the auto-repair configuration for a table. + */ public final class AutoRepairParams { public enum Option diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 7bb1a6a54e6d..df295b26b232 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -198,8 +198,7 @@ public static Builder builder(TableParams params) .transactionalMode(params.transactionalMode) .transactionalMigrationFrom(params.transactionalMigrationFrom) .pendingDrop(params.pendingDrop) - .automatedRepair(params.autoRepair) - ; + .automatedRepair(params.autoRepair); } public Builder unbuild() diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 1f7004c31d48..1c96ab685d2c 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -36,6 +36,9 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; +/** + * Implement all the MBeans for AutoRepair. + */ public class AutoRepairService implements AutoRepairServiceMBean { public static final String MBEAN_NAME = "org.apache.cassandra.db:type=AutoRepairService"; diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index be6cedba9329..40948f3711a5 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -20,6 +20,9 @@ import java.util.Set; +/** + * Defines all the MBeans exposed for AutoRepair. + */ public interface AutoRepairServiceMBean { /** diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 872aab37392f..a58f26c9aaf2 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2581,22 +2581,22 @@ public void setAutoRepairEnabled(String repairType, boolean enabled) autoRepairProxy.setAutoRepairEnabled(repairType, enabled); } - public void setRepairThreads(String repairType, int repairThreads) + public void setAutoRepairThreads(String repairType, int repairThreads) { autoRepairProxy.setRepairThreads(repairType, repairThreads); } - public void setRepairPriorityForHosts(String repairType, String commaSeparatedHostSet) + public void setAutoRepairPriorityForHosts(String repairType, String commaSeparatedHostSet) { autoRepairProxy.setRepairPriorityForHosts(repairType, commaSeparatedHostSet); } - public void setForceRepairForHosts(String repairType, String commaSeparatedHostSet) + public void setAutoRepairForceRepairForHosts(String repairType, String commaSeparatedHostSet) { autoRepairProxy.setForceRepairForHosts(repairType, commaSeparatedHostSet); } - public void setRepairMinInterval(String repairType, String minRepairInterval) + public void setAutoRepairMinInterval(String repairType, String minRepairInterval) { autoRepairProxy.setRepairMinInterval(repairType, minRepairInterval); } @@ -2606,7 +2606,7 @@ public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) autoRepairProxy.setAutoRepairHistoryClearDeleteHostsBufferDuration(duration); } - public void startScheduler() + public void startAutoRepairScheduler() { autoRepairProxy.startScheduler(); } @@ -2626,7 +2626,7 @@ public void setAutoRepairMinRepairTaskDuration(String duration) autoRepairProxy.setAutoRepairMinRepairTaskDuration(duration); } - public void setRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold) + public void setAutoRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold) { autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); } @@ -2641,22 +2641,22 @@ public void setAutoRepairIgnoreDCs(String repairType, Set ignoreDCs) autoRepairProxy.setIgnoreDCs(repairType, ignoreDCs); } - public void setParallelRepairPercentage(String repairType, int percentage) + public void setAutoRepairParallelRepairPercentage(String repairType, int percentage) { autoRepairProxy.setParallelRepairPercentage(repairType, percentage); } - public void setParallelRepairCount(String repairType, int count) + public void setAutoRepairParallelRepairCount(String repairType, int count) { autoRepairProxy.setParallelRepairCount(repairType, count); } - public void setPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) + public void setAutoRepairPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) { autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); } - public void setMaterializedViewRepairEnabled(String repairType, boolean enabled) + public void setAutoRepairMaterializedViewRepairEnabled(String repairType, boolean enabled) { autoRepairProxy.setMVRepairEnabled(repairType, enabled); } @@ -2666,17 +2666,17 @@ public List mutateSSTableRepairedState(boolean repair, boolean preview, return ssProxy.mutateSSTableRepairedState(repair, preview, keyspace, tables); } - public List getTablesForKeyspace(String keyspace) + public List getAutoRepairTablesForKeyspace(String keyspace) { return ssProxy.getTablesForKeyspace(keyspace); } - public void setRepairSessionTimeout(String repairType, String timeout) + public void setAutoRepairSessionTimeout(String repairType, String timeout) { autoRepairProxy.setRepairSessionTimeout(repairType, timeout); } - public Set getOnGoingRepairHostIds(String repairType) + public Set getAutoRepairOnGoingRepairHostIds(String repairType) { return autoRepairProxy.getOnGoingRepairHostIds(repairType); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java index 22f9afc43fe1..bb594a010ff1 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java +++ b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java @@ -31,6 +31,9 @@ import static com.google.common.base.Preconditions.checkArgument; +/** + * Provides currently running auto-repair tasks. + */ @Command(name = "autorepairstatus", description = "Print autorepair status") public class AutoRepairStatus extends NodeTool.NodeToolCmd { @@ -52,7 +55,7 @@ public void execute(NodeProbe probe) TableBuilder table = new TableBuilder(); table.add("Active Repairs"); - Set ongoingRepairHostIds = probe.getOnGoingRepairHostIds(repairType); + Set ongoingRepairHostIds = probe.getAutoRepairOnGoingRepairHostIds(repairType); table.add(getSetString(ongoingRepairHostIds)); table.printTo(out); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java index 961fb71fb4e4..9744498de757 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -25,6 +25,9 @@ import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; +/** + * Prints all the configurations for AutoRepair through nodetool. + */ @Command(name = "getautorepairconfig", description = "Print autorepair configurations") public class GetAutoRepairConfig extends NodeToolCmd { diff --git a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java index e1773fe47b7f..2a7b56732ac9 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java @@ -31,6 +31,9 @@ import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool; +/** + * Provides a way to set the repaired state of SSTables without any downtime through nodetool. + */ @Command(name = "sstablerepairedset", description = "Set the repaired state of SSTables for given keyspace/tables") public class SSTableRepairedSet extends NodeTool.NodeToolCmd { @@ -87,7 +90,7 @@ public void execute(NodeProbe probe) { sstableList.addAll(probe.mutateSSTableRepairedState(isRepaired, !reallySet, keyspace, tables.isEmpty() - ? probe.getTablesForKeyspace(keyspace) // mutate all tables + ? probe.getAutoRepairTablesForKeyspace(keyspace) // mutate all tables : tables)); // mutate specific tables } catch (InvalidRequestException e) diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index d55685e72426..9f22a43c4eb9 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -35,6 +35,9 @@ import static com.google.common.base.Preconditions.checkArgument; +/** + * Allows to set AutoRepair configuration through nodetool. + */ @Command(name = "setautorepairconfig", description = "sets the autorepair configuration") public class SetAutoRepairConfig extends NodeToolCmd { @@ -77,7 +80,7 @@ public void execute(NodeProbe probe) case "start_scheduler": if (Boolean.parseBoolean(paramVal)) { - probe.startScheduler(); + probe.startAutoRepairScheduler(); } return; case "history_clear_delete_hosts_buffer_interval": @@ -113,13 +116,13 @@ public void execute(NodeProbe probe) probe.setAutoRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "number_of_repair_threads": - probe.setRepairThreads(repairTypeStr, Integer.parseInt(paramVal)); + probe.setAutoRepairThreads(repairTypeStr, Integer.parseInt(paramVal)); break; case "min_repair_interval": - probe.setRepairMinInterval(repairTypeStr, paramVal); + probe.setAutoRepairMinInterval(repairTypeStr, paramVal); break; case "sstable_upper_threshold": - probe.setRepairSSTableCountHigherThreshold(repairTypeStr, Integer.parseInt(paramVal)); + probe.setAutoRepairSSTableCountHigherThreshold(repairTypeStr, Integer.parseInt(paramVal)); break; case "table_max_repair_time": probe.setAutoRepairTableMaxRepairTime(repairTypeStr, paramVal); @@ -127,11 +130,11 @@ public void execute(NodeProbe probe) case "priority_hosts": if (paramVal!= null && !paramVal.isEmpty()) { - probe.setRepairPriorityForHosts(repairTypeStr, paramVal); + probe.setAutoRepairPriorityForHosts(repairTypeStr, paramVal); } break; case "forcerepair_hosts": - probe.setForceRepairForHosts(repairTypeStr, paramVal); + probe.setAutoRepairForceRepairForHosts(repairTypeStr, paramVal); break; case "ignore_dcs": Set ignoreDCs = new HashSet<>(); @@ -142,19 +145,19 @@ public void execute(NodeProbe probe) probe.setAutoRepairIgnoreDCs(repairTypeStr, ignoreDCs); break; case "repair_primary_token_range_only": - probe.setPrimaryTokenRangeOnly(repairTypeStr, Boolean.parseBoolean(paramVal)); + probe.setAutoRepairPrimaryTokenRangeOnly(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "parallel_repair_count": - probe.setParallelRepairCount(repairTypeStr, Integer.parseInt(paramVal)); + probe.setAutoRepairParallelRepairCount(repairTypeStr, Integer.parseInt(paramVal)); break; case "parallel_repair_percentage": - probe.setParallelRepairPercentage(repairTypeStr, Integer.parseInt(paramVal)); + probe.setAutoRepairParallelRepairPercentage(repairTypeStr, Integer.parseInt(paramVal)); break; case "materialized_view_repair_enabled": - probe.setMaterializedViewRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); + probe.setAutoRepairMaterializedViewRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); break; case "repair_session_timeout": - probe.setRepairSessionTimeout(repairTypeStr, paramVal); + probe.setAutoRepairSessionTimeout(repairTypeStr, paramVal); break; default: throw new IllegalArgumentException("Unknown parameter: " + paramType); diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 67ba2b6009f1..39523892d6b2 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -42,6 +42,9 @@ import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.junit.Assert.assertEquals; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + */ public class AutoRepairSchedulerTest extends TestBaseImpl { diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 4b2486255adc..2bd1ae43116f 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -25,8 +25,8 @@ import java.io.IOError; import java.io.IOException; import java.io.InputStream; -import java.math.BigInteger; import java.lang.reflect.Field; +import java.math.BigInteger; import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; diff --git a/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java index 1b66086a4b71..8bf96329bacb 100644 --- a/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java +++ b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java @@ -37,6 +37,9 @@ import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.when; +/** + * Unit tests for {@link org.apache.cassandra.db.streaming.CassandraStreamReceiver} + */ public class CassandraStreamReceiverTest extends CQLTester { @Mock diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java index be14b8b912ea..ec36bb5cfd0c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.repair.autorepair; - import org.junit.Assert; import org.junit.Test; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 3bef45b8bd53..29e564cc3e12 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -44,6 +44,9 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig} + */ @RunWith(Parameterized.class) public class AutoRepairConfigTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java index 1337cf3dd2d3..241b1ac0de04 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -29,6 +29,9 @@ import org.apache.cassandra.config.DatabaseDescriptor; +/** + * Unit tests for {@link org.apache.cassandra.schema.SystemDistributedKeyspace} + */ public class AutoRepairKeyspaceTest { @BeforeClass diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 9e80bae37e59..d9471a5b7aef 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -78,6 +78,9 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + */ @RunWith(Parameterized.class) public class AutoRepairParameterizedTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java index 15d62c536a06..97e80364eed7 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -26,6 +26,9 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType} + */ public class AutoRepairStateFactoryTest { @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index 63e1ee4aaa0d..b4bb20c7eafd 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -49,6 +49,9 @@ import static org.mockito.Mockito.when; import static org.mockito.MockitoAnnotations.initMocks; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairState} + */ @RunWith(Parameterized.class) public class AutoRepairStateTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index 0accb33b0d37..caae31c69db1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -42,6 +42,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + */ public class AutoRepairTest extends CQLTester { @BeforeClass diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 1f64638bd734..79b1ef8096df 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -62,6 +62,9 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairUtils} + */ public class AutoRepairUtilsTest extends CQLTester { static RepairType repairType = RepairType.INCREMENTAL; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index 360a270908e5..6326f55f9d5c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -51,6 +51,9 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter} + */ @RunWith(Parameterized.class) public class FixedSplitTokenRangeSplitterTest { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java index 7b95acb879de..3c4efc25ebab 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java @@ -30,6 +30,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.PrioritizedRepairPlan} + */ public class PrioritizedRepairPlanTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index d1f598682326..2470527c401a 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -57,6 +57,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter} + */ @RunWith(Parameterized.class) public class RepairTokenRangeSplitterTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java index 8c0588cb1244..26727f1b30b6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -44,6 +44,9 @@ import static org.junit.Assert.fail; +/** + * Unit tests to cover AutoRepair functionality inside {@link org.apache.cassandra.service.StorageService} + */ public class SSTableRepairedAtTest extends CQLTester { public static final String TEST_KEYSPACE = "test_keyspace"; diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index 358a9af78f86..b08e83a667d0 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -28,6 +28,9 @@ import static org.junit.Assert.assertEquals; +/** + * Unit tests for {@link org.apache.cassandra.service.AutoRepairService} + */ public class AutoRepairServiceBasicTest extends CQLTester { private static AutoRepairService autoRepairService; private static AutoRepairConfig config; diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java index fccac4762cb3..9288468b0019 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -37,6 +37,9 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.junit.Assert.assertEquals; +/** + * Unit tests covering different repair types for {@link org.apache.cassandra.service.AutoRepairService} + */ @RunWith(Parameterized.class) public class AutoRepairServiceRepairTypeTest extends CQLTester { @Parameterized.Parameter() diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java index d3a99590de07..db87e995f558 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -48,6 +48,9 @@ import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.assertj.core.api.Assertions.assertThat; +/** + * Unit tests for (updating parameters through JMX) {@link org.apache.cassandra.service.AutoRepairService} + */ @RunWith(Parameterized.class) public class AutoRepairServiceSetterTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java index 1c7b04d50bcb..82293581d807 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -40,6 +40,9 @@ import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.when; +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.AutoRepairStatus} + */ @RunWith(Parameterized.class) public class AutoRepairStatusTest { @@ -94,7 +97,7 @@ public void testExecuteWithNoNodes() @Test public void testExecute() { - when(probe.getOnGoingRepairHostIds(repairType.name())).thenReturn(ImmutableSet.of("host1", "host2", "host3", "host4")); + when(probe.getAutoRepairOnGoingRepairHostIds(repairType.name())).thenReturn(ImmutableSet.of("host1", "host2", "host3", "host4")); cmd.repairType = repairType.name(); cmd.execute(probe); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java index bc168c0752d7..edd57447ad13 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java @@ -39,6 +39,9 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.SSTableRepairedSetTest} + */ public class SSTableRepairedSetTest { @Mock @@ -58,8 +61,8 @@ public void setUp() { public void testNoKeyspace() { when(probe.getNonLocalStrategyKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); - when(probe.getTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(Arrays.asList("table1", "table2"))); - when(probe.getTablesForKeyspace("ks2")).thenReturn(new ArrayList<>(Arrays.asList("table3", "table4"))); + when(probe.getAutoRepairTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(Arrays.asList("table1", "table2"))); + when(probe.getAutoRepairTablesForKeyspace("ks2")).thenReturn(new ArrayList<>(Arrays.asList("table3", "table4"))); cmd.isRepaired = true; cmd.reallySet = true; diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 335750a2f2b3..fab128fd299c 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -45,6 +45,9 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.SetAutoRepairConfig} + */ @RunWith(Suite.class) @Suite.SuiteClasses({ SetAutoRepairConfigTest.NoParamTests.class, SetAutoRepairConfigTest.RepairTypeParamTests.class, SetAutoRepairConfigTest.RepairTypeAndArgsParamsTests.class }) @@ -119,13 +122,13 @@ public void testStartScheduler() cmd.execute(probe); - verify(probe, times(0)).startScheduler(); + verify(probe, times(0)).startAutoRepairScheduler(); cmd.args = ImmutableList.of("start_scheduler", "true"); cmd.execute(probe); - verify(probe, times(1)).startScheduler(); + verify(probe, times(1)).startAutoRepairScheduler(); } @Test @@ -186,7 +189,7 @@ public void testRepairSchedulingDisabled() cmd.execute(probe); verify(out, times(1)).println("Auto-repair is not enabled"); - verify(probe, times(0)).setRepairThreads(repairType.name(), 1); + verify(probe, times(0)).setAutoRepairThreads(repairType.name(), 1); } @Test @@ -197,7 +200,7 @@ public void testRepairTypeDisabled() cmd.execute(probe); - verify(probe, times(1)).setRepairThreads(repairType.name(), 1); + verify(probe, times(1)).setAutoRepairThreads(repairType.name(), 1); } @@ -218,7 +221,7 @@ public void testV2FlagMissing() // expected } - verify(probe, times(0)).setRepairThreads(repairType.name(), 0); + verify(probe, times(0)).setAutoRepairThreads(repairType.name(), 0); } @Test(expected = IllegalArgumentException.class) @@ -239,7 +242,7 @@ public void testPriorityHosts() cmd.execute(probe); - verify(probe, times(1)).setRepairPriorityForHosts(repairType.name(), commaSeparatedHostSet); + verify(probe, times(1)).setAutoRepairPriorityForHosts(repairType.name(), commaSeparatedHostSet); } @Test @@ -251,7 +254,7 @@ public void testForceRepairHosts() cmd.execute(probe); - verify(probe, times(1)).setForceRepairForHosts(repairType.name(), commaSeparatedHostSet); + verify(probe, times(1)).setAutoRepairForceRepairForHosts(repairType.name(), commaSeparatedHostSet); } } @@ -275,14 +278,14 @@ public static Collection testCases() { return Stream.of( forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type.name(), true)), - forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setRepairThreads(type.name(), 1)), - forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setRepairMinInterval(type.name(), "3h")), - forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setRepairSSTableCountHigherThreshold(type.name(), 4)), + forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setAutoRepairThreads(type.name(), 1)), + forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setAutoRepairMinInterval(type.name(), "3h")), + forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setAutoRepairSSTableCountHigherThreshold(type.name(), 4)), forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type.name(), "5s")), - forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setPrimaryTokenRangeOnly(type.name(), true)), - forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setParallelRepairCount(type.name(), 6)), - forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setParallelRepairPercentage(type.name(), 7)), - forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setMaterializedViewRepairEnabled(type.name(), true)), + forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setAutoRepairPrimaryTokenRangeOnly(type.name(), true)), + forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setAutoRepairParallelRepairCount(type.name(), 6)), + forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setAutoRepairParallelRepairPercentage(type.name(), 7)), + forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairMaterializedViewRepairEnabled(type.name(), true)), forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type.name(), ImmutableSet.of("dc1", "dc2"))), forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type.name(), "max_bytes_per_schedule", "500GiB")) ).flatMap(Function.identity()).collect(Collectors.toList()); From 2c96a67480217a50eceb8afd2e888f1efaa45e6b Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 11 Feb 2025 15:34:20 -0800 Subject: [PATCH 198/257] Fix review comments from Andy --- src/java/org/apache/cassandra/config/ParameterizedClass.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/java/org/apache/cassandra/config/ParameterizedClass.java b/src/java/org/apache/cassandra/config/ParameterizedClass.java index 24f7307057e3..d772629f194c 100644 --- a/src/java/org/apache/cassandra/config/ParameterizedClass.java +++ b/src/java/org/apache/cassandra/config/ParameterizedClass.java @@ -20,7 +20,6 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.Collections; -import java.io.Serializable; import java.util.List; import java.util.Map; import java.util.function.Predicate; From a292b045f19987a7cf0641f46ffab4b1b3dd8902 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 11 Feb 2025 17:04:09 -0800 Subject: [PATCH 199/257] Additional review comments from Jaydeepkumar --- .../cassandra/metrics/AutoRepairMetrics.java | 21 +++++++++---------- .../repair/autorepair/AutoRepairConfig.java | 4 ++-- .../repair/autorepair/AutoRepairState.java | 4 ---- .../repair/autorepair/AutoRepairUtils.java | 11 ---------- .../FixedSplitTokenRangeSplitter.java | 3 --- .../autorepair/RepairAssignmentIterator.java | 1 - .../cassandra/schema/AutoRepairParams.java | 3 +-- .../cassandra/schema/SchemaKeyspace.java | 4 +--- .../schema/SystemDistributedKeyspace.java | 1 - .../apache/cassandra/schema/TableParams.java | 3 +-- .../service/AutoRepairServiceMBean.java | 3 --- .../org/apache/cassandra/tools/NodeProbe.java | 1 - .../apache/cassandra/utils/FBUtilities.java | 2 +- .../test/repair/AutoRepairSchedulerTest.java | 1 - .../config/YamlConfigurationLoaderTest.java | 10 ++++----- .../autorepair/AutoRepairConfigTest.java | 2 +- .../autorepair/AutoRepairKeyspaceTest.java | 4 +++- .../AutoRepairParameterizedTest.java | 1 - .../autorepair/AutoRepairUtilsTest.java | 4 +--- .../autorepair/PrioritizedRepairPlanTest.java | 2 -- .../RepairTokenRangeSplitterTest.java | 1 - .../autorepair/SSTableRepairedAtTest.java | 1 + .../service/AutoRepairServiceBasicTest.java | 2 -- .../AutoRepairServiceRepairTypeTest.java | 1 - .../nodetool/SetAutoRepairConfigTest.java | 1 - 25 files changed, 27 insertions(+), 64 deletions(-) diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index 0f5cefd30899..ee8a0abfea70 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -33,20 +33,19 @@ public class AutoRepairMetrics { public static final String TYPE_NAME = "autorepair"; - public Gauge repairsInProgress; - public Gauge nodeRepairTimeInSec; - public Gauge clusterRepairTimeInSec; - public Gauge longestUnrepairedSec; - public Gauge succeededTokenRangesCount; - public Gauge failedTokenRangesCount; - public Gauge skippedTokenRangesCount; - public Gauge skippedTablesCount; + public final Gauge repairsInProgress; + public final Gauge nodeRepairTimeInSec; + public final Gauge clusterRepairTimeInSec; + public final Gauge longestUnrepairedSec; + public final Gauge succeededTokenRangesCount; + public final Gauge failedTokenRangesCount; + public final Gauge skippedTokenRangesCount; + public final Gauge skippedTablesCount; + public final Gauge totalMVTablesConsideredForRepair; + public final Gauge totalDisabledRepairTables; public Counter repairTurnMyTurn; public Counter repairTurnMyTurnDueToPriority; public Counter repairTurnMyTurnForceRepair; - public Gauge totalMVTablesConsideredForRepair; - public Gauge totalDisabledRepairTables; - public AutoRepairMetrics(RepairType repairType) { AutoRepairMetricsFactory factory = new AutoRepairMetricsFactory(repairType); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 19c3029011de..069a7378fa16 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -453,8 +453,8 @@ protected static Options getDefaultOptions() opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); opts.materialized_view_repair_enabled = false; opts.token_range_splitter = new ParameterizedClass(DEFAULT_SPLITTER.getName(), Collections.emptyMap()); - opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); // 5 minutes - opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); // 3 hours + opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); + opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); return opts; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index a08fa08f350b..972543784032 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -83,17 +83,14 @@ public abstract class AutoRepairState implements ProgressListener protected int totalMVTablesConsideredForRepair = 0; @VisibleForTesting protected int totalDisabledTablesRepairCount = 0; - @VisibleForTesting protected int failedTokenRangesCount = 0; @VisibleForTesting protected int succeededTokenRangesCount = 0; @VisibleForTesting protected int skippedTokenRangesCount = 0; - @VisibleForTesting protected int skippedTablesCount = 0; - @VisibleForTesting protected AutoRepairHistory longestUnrepairedNode; @VisibleForTesting @@ -114,7 +111,6 @@ protected RepairCoordinator getRepairRunnable(String keyspace, RepairOption opti { RepairCoordinator task = new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace); - task.addProgressListener(this); return task; diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index f3f33e2c7a14..19f0558bea64 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -136,7 +136,6 @@ public class AutoRepairUtils COL_REPAIR_TYPE, COL_HOST_ID); final static String RECORD_FINISH_REPAIR_HISTORY = String.format( - "UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ?" , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); @@ -183,10 +182,8 @@ public static void setup() .forInternalCalls()); selectStatementRepairPriority = (SelectStatement) QueryProcessor.getStatement(SELECT_REPAIR_PRIORITY, ClientState .forInternalCalls()); - selectLastRepairTimeForNode = (SelectStatement) QueryProcessor.getStatement(SELECT_LAST_REPAIR_TIME_FOR_NODE, ClientState .forInternalCalls()); - delStatementPriorityStatus = (ModificationStatement) QueryProcessor.getStatement(DEL_REPAIR_PRIORITY, ClientState .forInternalCalls()); addPriorityHost = (ModificationStatement) QueryProcessor.getStatement(ADD_PRIORITY_HOST, ClientState @@ -390,14 +387,11 @@ public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) ByteBufferUtil.bytes(repairType.toString()), ByteBufferUtil.bytes(hostId))), Dispatcher.RequestTime.forImmediateExecution()); - UntypedResultSet repairTime = UntypedResultSet.create(rows.result); - if (repairTime.isEmpty()) { return 0; } - return repairTime.one().getLong(COL_REPAIR_FINISH_TS); } @@ -562,11 +556,9 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepair(repairType, autoRepairHistories == null ? 0 : autoRepairHistories.size()); logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); - if (currentRepairStatus == null || parallelRepairNumber > currentRepairStatus.hostIdsWithOnGoingRepair.size()) { // more repairs can be run, I might be the new one - if (autoRepairHistories != null) { logger.info("Auto repair history table has {} records", autoRepairHistories.size()); @@ -608,13 +600,11 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) "First node in priority list is {}", ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(priorityHostId))); return NOT_MY_TURN; } - if (myId.equals(priorityHostId)) { //I have a priority for repair hence its my turn now return MY_TURN_DUE_TO_PRIORITY; } - // get the longest unrepaired node from the nodes which are not running repair AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); //check who is next, which is helpful for debugging @@ -815,7 +805,6 @@ public static boolean shouldConsiderKeyspace(Keyspace ks) return repair; } - public static boolean tableMaxRepairTimeExceeded(RepairType repairType, long startTime) { long tableRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index a8d8b4744484..e27b8188bd2f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -77,7 +77,6 @@ public Iterator getRepairAssignments(boolean primaryR { return new RepairAssignmentIterator(repairPlans) { - @Override protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) { @@ -127,7 +126,6 @@ private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(boolean primar } } } - return new KeyspaceRepairAssignments(priority, keyspaceName, repairAssignments); } @@ -138,7 +136,6 @@ public void setParameter(String key, String value) { throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be " + NUMBER_OF_SUBRANGES); } - logger.info("Setting {} to {} for repair type {}", key, value, repairType); this.numberOfSubranges = Integer.parseInt(value); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java index fc5573a48944..44d9f5ef5e55 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java @@ -51,7 +51,6 @@ private synchronized Iterator currentIterator() currentIterator = currentPlan.getKeyspaceRepairPlans().iterator(); } } - return currentIterator; } diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index b2de800f83e0..a1111a84c51a 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -52,7 +52,7 @@ public String toString() } } - private ImmutableMap options; + private final ImmutableMap options; public static final Map DEFAULT_OPTIONS = ImmutableMap.of( LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.name()), Boolean.toString(true), @@ -152,7 +152,6 @@ public static boolean isValidInt(String value) return StringUtils.isNumeric(value); } - public Map options() { return options; diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index f7fe80d83d4c..21fad889d108 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -598,7 +598,6 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("extensions", params.extensions) .add("auto_repair", params.autoRepair.asMap()); - // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ // node sends table schema to a < 3.8 versioned node with an unknown column. if (DatabaseDescriptor.isCDCEnabled()) @@ -1087,8 +1086,7 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) .fastPath(getFastPathStrategy(row)) - .automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))) -; + .automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index f9c023e33b32..a1d03bbf8e03 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -184,7 +184,6 @@ private SystemDistributedKeyspace() private static final TableMetadata AutoRepairPriorityTable = parse(AUTO_REPAIR_PRIORITY, "Auto repair priority for each group", AUTO_REPAIR_PRIORITY_CQL).build(); - private static TableMetadata.Builder parse(String table, String description, String cql) { return CreateTableStatement.parse(format(cql, table), SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index df295b26b232..ffdfcccedf2d 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -101,8 +101,7 @@ public enum Option TRANSACTIONAL_MODE, TRANSACTIONAL_MIGRATION_FROM, PENDING_DROP, - AUTO_REPAIR, - ; + AUTO_REPAIR; @Override public String toString() diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 40948f3711a5..f3ec919c8f1b 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -25,9 +25,6 @@ */ public interface AutoRepairServiceMBean { - /** - * Enable or disable auto-repair for a given repair type - */ public void setAutoRepairEnabled(String repairType, boolean enabled); public void setRepairThreads(String repairType, int repairThreads); diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index a58f26c9aaf2..cab5153a70d3 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -182,7 +182,6 @@ public class NodeProbe implements AutoCloseable protected PermissionsCacheMBean pcProxy; protected RolesCacheMBean rcProxy; protected AutoRepairServiceMBean autoRepairProxy; - protected Output output; private boolean failed; diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index ed44b84164f1..fd7d24c1e502 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -65,7 +65,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; - +import com.vdurmont.semver4j.Semver; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 39523892d6b2..c6fa37f54dd7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -47,7 +47,6 @@ */ public class AutoRepairSchedulerTest extends TestBaseImpl { - private static Cluster cluster; static SimpleDateFormat sdf; diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 10075671e589..ace51ebe2ce8 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -278,11 +278,11 @@ public void fromMapTest() "optional", false, "enabled", true); Map autoRepairConfig = ImmutableMap.of("enabled", true, - "global_settings", ImmutableMap.of("number_of_repair_threads", - 1), - "repair_type_overrides", ImmutableMap.of( - "full", ImmutableMap.of("number_of_repair_threads", - 2))); + "global_settings", + ImmutableMap.of("number_of_repair_threads", 1), + "repair_type_overrides", + ImmutableMap.of("full", + ImmutableMap.of("number_of_repair_threads", 2))); Map map = new ImmutableMap.Builder() .put("storage_port", storagePort) .put("commitlog_sync", commitLogSync) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 29e564cc3e12..e8c60f2d9b50 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -52,7 +52,7 @@ public class AutoRepairConfigTest extends CQLTester { private AutoRepairConfig config; - private Set testSet = ImmutableSet.of("dc1"); + private final Set testSet = ImmutableSet.of("dc1"); @Parameterized.Parameter public AutoRepairConfig.RepairType repairType; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java index 241b1ac0de04..ac9dac8236e8 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -22,7 +22,9 @@ import java.util.Iterator; import java.util.Set; -import org.apache.cassandra.schema.*; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.apache.cassandra.schema.TableMetadata; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index d9471a5b7aef..cca5513e3712 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -728,7 +728,6 @@ public void testRepairThrowsForIRWithMVReplay() } } - @Test public void testRepairThrowsForIRWithCDCReplay() { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index 79b1ef8096df..d79f0cb6b4c8 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -67,7 +67,7 @@ */ public class AutoRepairUtilsTest extends CQLTester { - static RepairType repairType = RepairType.INCREMENTAL; + static final RepairType repairType = RepairType.INCREMENTAL; static UUID hostId; static InetAddressAndPort localEndpoint; @@ -132,7 +132,6 @@ public void testSetForceRepairNewNode() assertTrue(result.one().getBoolean(COL_FORCE_REPAIR)); } - @Test public void testClearDeleteHosts() { @@ -262,7 +261,6 @@ public void testGetMaxNumberOfNodeRunAutoRepairInGroup_0_group_size() assertEquals(2, count); } - @Test public void testGetMaxNumberOfNodeRunAutoRepairInGroup_percentage() { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java index 3c4efc25ebab..38f9c8538846 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java @@ -35,7 +35,6 @@ */ public class PrioritizedRepairPlanTest extends CQLTester { - @Test public void testBuildWithDifferentPriorities() { @@ -162,5 +161,4 @@ public void testBuildWithOneTable() assertEquals(5, prioritizedRepairPlans.get(0).getPriority()); assertEquals(table1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); } - } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 2470527c401a..7da664e13c91 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -377,7 +377,6 @@ private SizeEstimate sizeEstimateByBytes(LongMebibytesBound sizeInRange, LongMeb return new SizeEstimate(RepairType.INCREMENTAL, KEYSPACE, "table1", FULL_RANGE, 1, sizeInRange.toBytes(), totalSize.toBytes()); } - private void insertAndFlushSingleTable() throws Throwable { execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java index 26727f1b30b6..14677490ca2d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -78,6 +78,7 @@ public void clearData() table2 = Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table2"); assert table2 != null; } + @Test public void testGetTablesForKeyspace() { diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index b08e83a667d0..821a39d9e1e6 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -67,7 +67,6 @@ public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() { assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); } - @Test public void testsetAutoRepairMaxRetriesCount() { autoRepairService.setAutoRepairMaxRetriesCount(101); @@ -75,7 +74,6 @@ public void testsetAutoRepairMaxRetriesCount() { assertEquals(101, config.getRepairMaxRetries()); } - @Test public void testsetAutoRepairRetryBackoffInSec() { autoRepairService.setAutoRepairRetryBackoff("102s"); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java index 9288468b0019..f2ee61930b09 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -55,7 +55,6 @@ public static Collection repairTypes() { return Arrays.asList(AutoRepairConfig.RepairType.values()); } - @BeforeClass public static void setupClass() throws Exception { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index fab128fd299c..5a32c8285d48 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -203,7 +203,6 @@ public void testRepairTypeDisabled() verify(probe, times(1)).setAutoRepairThreads(repairType.name(), 1); } - @Test public void testV2FlagMissing() { From c4f40637ee5f3cb54ffcaaffcde795da503fc26f Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 13 Feb 2025 11:32:43 -0800 Subject: [PATCH 200/257] Additional review comments from Andy --- .../service/AutoRepairServiceBasicTest.java | 42 ++++++++++++------- .../AutoRepairServiceRepairTypeTest.java | 17 +++++--- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index 821a39d9e1e6..bd6fcd608ba3 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -31,12 +31,14 @@ /** * Unit tests for {@link org.apache.cassandra.service.AutoRepairService} */ -public class AutoRepairServiceBasicTest extends CQLTester { +public class AutoRepairServiceBasicTest extends CQLTester +{ private static AutoRepairService autoRepairService; private static AutoRepairConfig config; @Before - public void setUp() { + public void setUp() + { DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsEnabled(false); @@ -47,7 +49,8 @@ public void setUp() { } @Test - public void testSetup() { + public void testSetup() + { AutoRepairService.instance.config = null; AutoRepairService.setup(); @@ -56,40 +59,46 @@ public void testSetup() { } @Test - public void testGetAutoRepairConfigReturnsConfig() { + public void testGetAutoRepairConfigReturnsConfig() + { assertEquals(config, autoRepairService.getAutoRepairConfig()); } @Test - public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() { + public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() + { autoRepairService.setAutoRepairHistoryClearDeleteHostsBufferDuration("100s"); assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); } @Test - public void testsetAutoRepairMaxRetriesCount() { + public void testsetAutoRepairMaxRetriesCount() + { autoRepairService.setAutoRepairMaxRetriesCount(101); assertEquals(101, config.getRepairMaxRetries()); } @Test - public void testsetAutoRepairRetryBackoffInSec() { + public void testsetAutoRepairRetryBackoffInSec() + { autoRepairService.setAutoRepairRetryBackoff("102s"); assertEquals(102, config.getRepairRetryBackoff().toSeconds()); } @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() { + public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() + { autoRepairService.config = new AutoRepairConfig(false); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); } @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisabled() { + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisabled() + { autoRepairService.config = new AutoRepairConfig(true); autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); @@ -97,7 +106,8 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisa } @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { + public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() + { autoRepairService.config = new AutoRepairConfig(true); autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); @@ -105,7 +115,8 @@ public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() { } @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() + { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setMaterializedViewsEnabled(true); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); @@ -113,7 +124,8 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() { } @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled() { + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled() + { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(false); @@ -121,7 +133,8 @@ public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled } @Test(expected = ConfigurationException.class) - public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { + public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() + { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); @@ -129,7 +142,8 @@ public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() { } @Test - public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() { + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() + { autoRepairService.config = new AutoRepairConfig(true); DatabaseDescriptor.setCDCEnabled(true); autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java index f2ee61930b09..9c2af3e1c793 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -19,9 +19,11 @@ package org.apache.cassandra.service; import com.google.common.collect.ImmutableSet; + import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.repair.autorepair.AutoRepairUtils; + import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -41,7 +43,8 @@ * Unit tests covering different repair types for {@link org.apache.cassandra.service.AutoRepairService} */ @RunWith(Parameterized.class) -public class AutoRepairServiceRepairTypeTest extends CQLTester { +public class AutoRepairServiceRepairTypeTest extends CQLTester +{ @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; @@ -51,25 +54,29 @@ public class AutoRepairServiceRepairTypeTest extends CQLTester { private AutoRepairService instance; @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() { + public static Collection repairTypes() + { return Arrays.asList(AutoRepairConfig.RepairType.values()); } @BeforeClass - public static void setupClass() throws Exception { + public static void setupClass() throws Exception + { SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); setAutoRepairEnabled(true); requireNetwork(); } @Before - public void setUpTest() { + public void setUpTest() + { AutoRepairUtils.setup(); instance = new AutoRepairService(); } @Test - public void testGetOnGoingRepairHostIdsTest() { + public void testGetOnGoingRepairHostIdsTest() + { long now = System.currentTimeMillis(); AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); From 2290b217b991d75266e24403ec9b6033198f1e11 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 13 Feb 2025 12:05:15 -0800 Subject: [PATCH 201/257] Additional review comments from Jaydeepkumar --- NEWS.txt | 5 +++++ .../config/YamlConfigurationLoaderTest.java | 2 +- .../autorepair/RepairTokenRangeSplitterTest.java | 15 ++++++++------- .../service/ActiveRepairServiceTest.java | 1 - 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/NEWS.txt b/NEWS.txt index b35a3c02745b..5cb99edcab73 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -72,6 +72,11 @@ using the provided 'sstableupgrade' tool. New features ------------ [The following is a placeholder, to be revised asap] + - CEP-37 Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This + significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit + and manage repairs. See + https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution for more + details on the motivation and design - CEP-21 Transactional Cluster Metadata introduces a distributed log for linearizing modifications to cluster metadata. In the first instance, this encompasses cluster membership, token ownership and schema metadata. See https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-21%3A+Transactional+Cluster+Metadata for more detail on diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index ace51ebe2ce8..68d5302047be 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -302,7 +302,7 @@ public void fromMapTest() assertEquals(true, config.client_encryption_options.enabled); // Check a nested object assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_send_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) - assertEquals(true, config.auto_repair.enabled); + assertTrue(config.auto_repair.enabled); assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.INCREMENTAL)); config.auto_repair.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index 7da664e13c91..deea6a33f1a3 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -126,8 +126,8 @@ public void testSizePartitionCountSplit() throws Throwable Range tokenRange2 = range.next(); Assert.assertFalse(range.hasNext()); - try(Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); - Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2)) + try (Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); + Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2)) { SizeEstimate sizes1 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); SizeEstimate sizes2 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); @@ -330,23 +330,23 @@ public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxByte FilteredRepairAssignments filteredRepairAssignments = repairRangeSplitter.filterRepairAssignments(0, KEYSPACE, assignments, 0); List finalRepairAssignments = filteredRepairAssignments.repairAssignments; assertEquals(2, finalRepairAssignments.size()); - assertEquals(expectedBytes*2, filteredRepairAssignments.newBytesSoFar); + assertEquals(expectedBytes * 2, filteredRepairAssignments.newBytesSoFar); } - @Test(expected=IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testSetParameterShouldNotAllowUnknownParameter() { repairRangeSplitter.setParameter("unknown", "x"); } - @Test(expected=IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testSetParameterShouldNotAllowSettingBytesPerAssignmentGreaterThanMaxBytesPerSchedule() { repairRangeSplitter.setParameter(MAX_BYTES_PER_SCHEDULE, "500GiB"); repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "600GiB"); } - @Test(expected=IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testSetParameterShouldNotAllowSettingMaxBytesPerScheduleLessThanBytesPerAssignment() { repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "100MiB"); @@ -386,7 +386,8 @@ private void insertAndFlushSingleTable() throws Throwable private List createAndInsertTables(int count) throws Throwable { List tableNames = new ArrayList<>(); - for (int i = 0; i < count; i++) { + for (int i = 0; i < count; i++) + { String tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); tableNames.add(tableName); insertAndFlushTable(tableName); diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index cb90cc725295..6ccfbcbfa971 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -247,7 +247,6 @@ public void testGetNeighborsSpecifiedHostsWithNoLocalHost() throws Throwable ActiveRepairService.instance().getNeighbors(KEYSPACE5, ranges, ranges.iterator().next(), null, hosts); } - @Test public void testParentRepairStatus() throws Throwable { From 908c9c162f95caf3aaa43755f4911389a1aa1a05 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 14 Feb 2025 12:37:37 -0800 Subject: [PATCH 202/257] Incorporate NEWS.txt review comment --- NEWS.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.txt b/NEWS.txt index 5cb99edcab73..2026fb09a762 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -76,7 +76,7 @@ New features significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit and manage repairs. See https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution for more - details on the motivation and design + details on the motivation and design. - CEP-21 Transactional Cluster Metadata introduces a distributed log for linearizing modifications to cluster metadata. In the first instance, this encompasses cluster membership, token ownership and schema metadata. See https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-21%3A+Transactional+Cluster+Metadata for more detail on From c76349b9a1513083b20c063bf59a2cac606529c5 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 20 Feb 2025 16:21:10 -0800 Subject: [PATCH 203/257] A new table property should be wrapped inside a config --- pylib/cqlshlib/cql3handling.py | 3 + pylib/cqlshlib/test/test_cqlsh_completion.py | 6 +- .../repair/autorepair/AutoRepairConfig.java | 6 ++ .../cassandra/schema/SchemaKeyspace.java | 19 ++++- .../AutoRepairTablePropertyTest.java | 84 +++++++++++++++++++ 5 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py index f1baf2ad2952..d2e8e0de63c7 100644 --- a/pylib/cqlshlib/cql3handling.py +++ b/pylib/cqlshlib/cql3handling.py @@ -56,6 +56,7 @@ class Cql3ParsingRuleSet(CqlParsingRuleSet): ('memtable_flush_period_in_ms', None), ('cdc', None), ('read_repair', None), + ('auto_repair', None), ) columnfamily_layout_map_options = ( @@ -559,6 +560,8 @@ def cf_prop_val_completer(ctxt, cass): return [Hint('')] if this_opt == 'incremental_backups': return [Hint('')] + if this_opt == 'auto_repair': + return ["{'full_enabled': '"] return [Hint('')] diff --git a/pylib/cqlshlib/test/test_cqlsh_completion.py b/pylib/cqlshlib/test/test_cqlsh_completion.py index bf2385a46fef..90cddf35bce1 100644 --- a/pylib/cqlshlib/test/test_cqlsh_completion.py +++ b/pylib/cqlshlib/test/test_cqlsh_completion.py @@ -678,7 +678,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH ', choices=['allow_auto_snapshot', 'bloom_filter_fp_chance', 'compaction', @@ -690,7 +690,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH bloom_filter_fp_chance ', immediate='= ') self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH bloom_filter_fp_chance = ', @@ -740,7 +740,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) self.trycompletions(prefix + " new_table (col_a int PRIMARY KEY) WITH compaction = " + "{'class': 'TimeWindowCompactionStrategy', '", choices=['compaction_window_unit', 'compaction_window_size', diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 069a7378fa16..ae3a70476993 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -147,6 +147,12 @@ public boolean isAutoRepairSchedulingEnabled() return enabled; } + @VisibleForTesting + public void setAutoRepairSchedulingEnabled(boolean enabled) + { + this.enabled = enabled; + } + public DurationSpec.IntSecondsBound getAutoRepairHistoryClearDeleteHostsBufferInterval() { return history_clear_delete_hosts_buffer_interval; diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 21fad889d108..7781ef29b9e6 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -595,8 +595,7 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui .add("compaction", params.compaction.asMap()) .add("compression", params.compression.asMap()) .add("read_repair", params.readRepair.toString()) - .add("extensions", params.extensions) - .add("auto_repair", params.autoRepair.asMap()); + .add("extensions", params.extensions); // Only add CDC-enabled flag to schema if it's enabled on the node. This is to work around RTE's post-8099 if a 3.8+ // node sends table schema to a < 3.8 versioned node with an unknown column. @@ -620,6 +619,13 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui if (DatabaseDescriptor.getAccordTransactionsEnabled() && !forView) builder.add("fast_path", params.fastPath.asMap()); + + // As above, only add the auto_repair column if the scheduler is enabled + // to avoid RTE in pre-5.1 versioned node during upgrades + if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + builder.add("auto_repair", params.autoRepair.asMap()); + } } private static void addAlterTableToSchemaMutation(TableMetadata oldTable, TableMetadata newTable, Mutation.SimpleBuilder builder) @@ -1085,8 +1091,7 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) SpeculativeRetryPolicy.fromString("99PERCENTILE")) .cdc(row.has("cdc") && row.getBoolean("cdc")) .readRepair(getReadRepairStrategy(row)) - .fastPath(getFastPathStrategy(row)) - .automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))); + .fastPath(getFastPathStrategy(row)); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) @@ -1096,6 +1101,12 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) if (row.has("incremental_backups")) builder.incrementalBackups(row.getBoolean("incremental_backups")); + // auto_repair column was introduced in 5.1 + if (row.has("auto_repair")) + { + builder.automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))); + } + return builder.build(); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java new file mode 100644 index 000000000000..51551e46369d --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Map; + +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.SimpleBuilders; +import org.apache.cassandra.db.rows.ColumnData; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SchemaKeyspace; +import org.apache.cassandra.schema.SchemaKeyspaceTables; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +/** + * Unit tests that verifies "auto_repair" is not included in Schema mutation + * {@link org.apache.cassandra.schema.SchemaKeyspace} if AutoRepair is disabled + */ +public class AutoRepairTablePropertyTest extends CQLTester +{ + @Test + public void testSchedulerDisabledNoColumnReturned() + { + helperTestTableProperty(false); + } + + @Test + public void testSchedulerEnabledShouldReturnColumnReturned() + { + helperTestTableProperty(true); + } + + public void helperTestTableProperty(boolean autoRepairOn) + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(autoRepairOn); + + Map systemSchemaTables = Map.of(SchemaKeyspaceTables.TABLES, "table_name", SchemaKeyspaceTables.VIEWS, "view_name"); + for (Map.Entry systemSchema : systemSchemaTables.entrySet()) + { + ColumnFamilyStore tables = Keyspace.open(SchemaConstants.SCHEMA_KEYSPACE_NAME).getColumnFamilyStore(systemSchema.getKey()); + SimpleBuilders.RowBuilder builder = new SimpleBuilders.RowBuilder(tables.metadata(), systemSchema.getValue()); + SchemaKeyspace.addTableParamsToRowBuilder(tables.metadata().params, builder); + Row row = builder.build(); + ColumnMetadata autoRepair = tables.metadata().getColumn(ByteBufferUtil.bytes("auto_repair")); + ColumnData data = row.getCell(autoRepair); + if (autoRepairOn) + { + assertNotNull(data); + } + else + { + // if AutoRepair is not enabled, the column should not be returned + // as part of the system_schema.tables mutation + assertNull(data); + } + } + } +} From cce1fafdee3e41e69df792bfb55dd5e7c3bba422 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 21 Feb 2025 08:32:38 -0800 Subject: [PATCH 204/257] Skip displaying the table property if AutoRepair is disabled --- src/java/org/apache/cassandra/schema/TableParams.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index ffdfcccedf2d..c56ddd4ab41f 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -28,6 +28,7 @@ import com.google.common.collect.Maps; import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.Attributes; import org.apache.cassandra.cql3.CqlBuilder; import org.apache.cassandra.exceptions.ConfigurationException; @@ -419,9 +420,12 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) .newLine(); } - builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()) - .newLine() - .append("AND auto_repair = ").append(autoRepair.asMap()); + builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); + if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + builder.newLine() + .append("AND auto_repair = ").append(autoRepair.asMap()); + } } public static final class Builder From 934892d59db4e0f932346d39e0478fa14b1bff7c Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 21 Feb 2025 09:46:59 -0800 Subject: [PATCH 205/257] Fix broken tests --- pylib/cqlshlib/cql3handling.py | 3 --- pylib/cqlshlib/test/test_cqlsh_completion.py | 6 +++--- pylib/cqlshlib/test/test_cqlsh_output.py | 3 +-- .../cassandra/cql3/statements/DescribeStatementTest.java | 5 ++--- test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java | 3 +-- 5 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py index d2e8e0de63c7..f1baf2ad2952 100644 --- a/pylib/cqlshlib/cql3handling.py +++ b/pylib/cqlshlib/cql3handling.py @@ -56,7 +56,6 @@ class Cql3ParsingRuleSet(CqlParsingRuleSet): ('memtable_flush_period_in_ms', None), ('cdc', None), ('read_repair', None), - ('auto_repair', None), ) columnfamily_layout_map_options = ( @@ -560,8 +559,6 @@ def cf_prop_val_completer(ctxt, cass): return [Hint('')] if this_opt == 'incremental_backups': return [Hint('')] - if this_opt == 'auto_repair': - return ["{'full_enabled': '"] return [Hint('')] diff --git a/pylib/cqlshlib/test/test_cqlsh_completion.py b/pylib/cqlshlib/test/test_cqlsh_completion.py index 90cddf35bce1..bf2385a46fef 100644 --- a/pylib/cqlshlib/test/test_cqlsh_completion.py +++ b/pylib/cqlshlib/test/test_cqlsh_completion.py @@ -678,7 +678,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH ', choices=['allow_auto_snapshot', 'bloom_filter_fp_chance', 'compaction', @@ -690,7 +690,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH bloom_filter_fp_chance ', immediate='= ') self.trycompletions(prefix + ' new_table (col_a int PRIMARY KEY) WITH bloom_filter_fp_chance = ', @@ -740,7 +740,7 @@ def create_columnfamily_table_template(self, name): 'memtable_flush_period_in_ms', 'CLUSTERING', 'COMPACT', 'caching', 'comment', - 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair', 'auto_repair']) + 'min_index_interval', 'speculative_retry', 'additional_write_policy', 'cdc', 'read_repair']) self.trycompletions(prefix + " new_table (col_a int PRIMARY KEY) WITH compaction = " + "{'class': 'TimeWindowCompactionStrategy', '", choices=['compaction_window_unit', 'compaction_window_size', diff --git a/pylib/cqlshlib/test/test_cqlsh_output.py b/pylib/cqlshlib/test/test_cqlsh_output.py index ec400cf7d735..c32690b42496 100644 --- a/pylib/cqlshlib/test/test_cqlsh_output.py +++ b/pylib/cqlshlib/test/test_cqlsh_output.py @@ -701,8 +701,7 @@ def test_describe_columnfamily_output(self): AND read_repair = 'BLOCKING' AND transactional_mode = 'off' AND transactional_migration_from = 'none' - AND speculative_retry = '99p' - AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};""" % quote_name(get_keyspace())) + AND speculative_retry = '99p';""" % quote_name(get_keyspace())) with cqlsh_testrun(tty=True, env=self.default_env) as c: for cmdword in ('describe table', 'desc columnfamily'): diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index 9f27ee04a7fe..f96a0bbc35f2 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -1144,7 +1144,7 @@ private static String tableParametersCql() " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = '99p'\n" + - " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + " AND speculative_retry = '99p';"; } private static String cqlQuoted(Map map) @@ -1171,8 +1171,7 @@ private static String mvParametersCql() " AND memtable_flush_period_in_ms = 0\n" + " AND min_index_interval = 128\n" + " AND read_repair = 'BLOCKING'\n" + - " AND speculative_retry = '99p'\n" + - " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + " AND speculative_retry = '99p';"; } private static String keyspaceOutput() diff --git a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java index a328790cce9d..da5f6d1853cb 100644 --- a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java +++ b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java @@ -349,8 +349,7 @@ public void testCfmOptionsCQL() " AND read_repair = 'BLOCKING'\n" + " AND transactional_mode = 'off'\n" + " AND transactional_migration_from = 'none'\n" + - " AND speculative_retry = 'ALWAYS'\n" + - " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};" + " AND speculative_retry = 'ALWAYS';" )); } From e0b2c69e25cb561b71e821d653a4d7c92ec61b7d Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 21 Feb 2025 14:26:45 -0800 Subject: [PATCH 206/257] Extend DescribeStatementTest to cover properties when AutoRepair scheduler is enabled --- .../statements/DescribeStatementTest.java | 149 +++++++++++++----- 1 file changed, 107 insertions(+), 42 deletions(-) diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index f96a0bbc35f2..a7cbf41a99fd 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -270,6 +270,19 @@ public void testDescribeVirtualTables() throws Throwable @Test public void testDescribe() throws Throwable + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(false); + helperTestDescribe(); + } + + @Test + public void testDescribeWithAutoRepair() throws Throwable + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(true); + helperTestDescribe(); + } + + public void helperTestDescribe() throws Throwable { try { @@ -833,7 +846,7 @@ public void testDescribeCreateLikeTable() throws Throwable requireNetwork(); DatabaseDescriptor.setDynamicDataMaskingEnabled(true); String souceTable = createTable(KEYSPACE_PER_TEST, - "CREATE TABLE %s (" + + "CREATE TABLE %s (" + " pk1 text, " + " pk2 int MASKED WITH DEFAULT, " + " ck1 int, " + @@ -1122,29 +1135,56 @@ private static String testTableOutput() private static String tableParametersCql() { - return "additional_write_policy = '99p'\n" + - " AND allow_auto_snapshot = true\n" + - " AND bloom_filter_fp_chance = 0.01\n" + - " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + - " AND cdc = false\n" + - " AND comment = ''\n" + - " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + - " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + - " AND memtable = 'default'\n" + - " AND crc_check_chance = 1.0\n" + - " AND fast_path = 'keyspace'\n" + - " AND default_time_to_live = 0\n" + - " AND extensions = {}\n" + - " AND gc_grace_seconds = 864000\n" + - " AND incremental_backups = true\n" + - " AND max_index_interval = 2048\n" + - " AND memtable_flush_period_in_ms = 0\n" + - " AND min_index_interval = 128\n" + - " AND read_repair = 'BLOCKING'\n" + - " AND transactional_mode = 'off'\n" + - " AND transactional_migration_from = 'none'\n" + - " AND speculative_retry = '99p'\n" + - " AND speculative_retry = '99p';"; + if (!DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND fast_path = 'keyspace'\n" + + " AND default_time_to_live = 0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND transactional_mode = 'off'\n" + + " AND transactional_migration_from = 'none'\n" + + " AND speculative_retry = '99p';"; + } + else + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND default_time_to_live = 0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND transactional_mode = 'off'\n" + + " AND transactional_migration_from = 'none'\n" + + " AND speculative_retry = '99p'\n" + + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + } } private static String cqlQuoted(Map map) @@ -1154,24 +1194,49 @@ private static String cqlQuoted(Map map) private static String mvParametersCql() { - return "additional_write_policy = '99p'\n" + - " AND allow_auto_snapshot = true\n" + - " AND bloom_filter_fp_chance = 0.01\n" + - " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + - " AND cdc = false\n" + - " AND comment = ''\n" + - " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + - " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + - " AND memtable = 'default'\n" + - " AND crc_check_chance = 1.0\n" + - " AND extensions = {}\n" + - " AND gc_grace_seconds = 864000\n" + - " AND incremental_backups = true\n" + - " AND max_index_interval = 2048\n" + - " AND memtable_flush_period_in_ms = 0\n" + - " AND min_index_interval = 128\n" + - " AND read_repair = 'BLOCKING'\n" + - " AND speculative_retry = '99p';"; + if (!DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND speculative_retry = '99p';"; + } + else + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND speculative_retry = '99p'\n" + + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + } } private static String keyspaceOutput() From 6cd50192a0f7020d93b2d6cda33ab3be0e0fe31c Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Thu, 30 Jan 2025 11:41:56 -0800 Subject: [PATCH 207/257] Fix race condition in auto-repair scheduler --- .../repair/autorepair/AutoRepair.java | 78 ++++++- .../repair/autorepair/AutoRepairState.java | 63 +----- .../AutoRepairParameterizedTest.java | 206 ++++++++++++++---- .../autorepair/AutoRepairStateTest.java | 101 +-------- 4 files changed, 240 insertions(+), 208 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index bd656a46340d..ed28bfcf1afb 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -18,6 +18,7 @@ package org.apache.cassandra.repair.autorepair; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.EnumMap; import java.util.HashSet; @@ -56,12 +57,17 @@ import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; +import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.apache.cassandra.utils.progress.ProgressEventType; +import org.apache.cassandra.utils.progress.ProgressListener; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; /** * AutoRepair scheduler responsible for running different types of repairs. @@ -69,12 +75,11 @@ public class AutoRepair { private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); + private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); @VisibleForTesting protected static Supplier timeFunc = Clock.Global::currentTimeMillis; - public static AutoRepair instance = new AutoRepair(); - // Sleep for 5 seconds if repair finishes quickly to flush JMX metrics; it happens only for Cassandra nodes with tiny amount of data. public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); @@ -95,6 +100,7 @@ public class AutoRepair protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; private boolean isSetupDone = false; + public static AutoRepair instance = new AutoRepair(); @VisibleForTesting protected AutoRepair() @@ -304,26 +310,29 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim if ((totalProcessedAssignments % config.getRepairThreads(repairType) == 0) || (totalProcessedAssignments == totalRepairAssignments)) { + boolean success = false; int retryCount = 0; Future f = null; while (retryCount <= config.getRepairMaxRetries()) { RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, - Lists.newArrayList(curRepairAssignment.getTableNames()), - ranges, primaryRangeOnly); - repairState.resetWaitCondition(); + Lists.newArrayList(curRepairAssignment.getTableNames()), + ranges, primaryRangeOnly); + RepairProgressListener listener = new RepairProgressListener(repairType); + task.addProgressListener(listener); f = repairRunnableExecutors.get(repairType).submit(task); try { long jobStartTime = timeFunc.get(); - repairState.waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); + listener.await(); + success = listener.isSuccess(); soakAfterRepair(jobStartTime, config.getRepairTaskMinDuration().toMilliseconds()); } catch (InterruptedException e) { logger.error("Exception in cond await:", e); } - if (repairState.isSuccess()) + if (success) { break; } @@ -339,7 +348,7 @@ else if (retryCount < config.getRepairMaxRetries()) retryCount++; } //check repair status - if (repairState.isSuccess()) + if (success) { logger.info("Repair completed for range {}-{} for {} tables {}, total assignments: {}," + "processed assignments: {}", tokenRange.left, tokenRange.right, @@ -500,4 +509,57 @@ static class CollectedRepairStats int skippedTokenRanges = 0; int skippedTables = 0; } + + @VisibleForTesting + protected static class RepairProgressListener implements ProgressListener + { + private final AutoRepairConfig.RepairType repairType; + @VisibleForTesting + protected boolean success; + @VisibleForTesting + protected final Condition condition = newOneTimeCondition(); + + public RepairProgressListener(AutoRepairConfig.RepairType repairType) + { + this.repairType = repairType; + } + + public void await() throws InterruptedException + { + //if for some reason we don't hear back on repair progress for sometime + if (!condition.await(12, TimeUnit.HOURS)) + { + success = false; + } + } + + public boolean isSuccess() + { + return success; + } + + @Override + public void progress(String tag, ProgressEvent event) + { + ProgressEventType type = event.getType(); + String message = String.format("[%s] %s", format.format(System.currentTimeMillis()), event.getMessage()); + if (type == ProgressEventType.ERROR) + { + logger.error("Repair failure for repair {}: {}", repairType.toString(), message); + success = false; + condition.signalAll(); + } + if (type == ProgressEventType.PROGRESS) + { + message = message + " (progress: " + (int) event.getProgressPercentage() + "%)"; + logger.debug("Repair progress for repair {}: {}", repairType.toString(), message); + } + if (type == ProgressEventType.COMPLETE) + { + logger.debug("Repair completed for repair {}: {}", repairType.toString(), message); + success = true; + condition.signalAll(); + } + } + } } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index 972543784032..7266a28d1cc7 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -20,7 +20,6 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.view.TableViews; @@ -37,10 +36,6 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.concurrent.Condition; -import org.apache.cassandra.utils.progress.ProgressEvent; -import org.apache.cassandra.utils.progress.ProgressEventType; -import org.apache.cassandra.utils.progress.ProgressListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,13 +47,10 @@ import java.util.function.Supplier; import java.util.stream.Collectors; -import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; -import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; - /** * AutoRepairState represents the state of automated repair for a given repair type. */ -public abstract class AutoRepairState implements ProgressListener +public abstract class AutoRepairState { protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); @@ -93,10 +85,6 @@ public abstract class AutoRepairState implements ProgressListener protected int skippedTablesCount = 0; @VisibleForTesting protected AutoRepairHistory longestUnrepairedNode; - @VisibleForTesting - protected Condition condition = newOneTimeCondition(); - @VisibleForTesting - protected boolean success = true; protected final AutoRepairMetrics metrics; protected AutoRepairState(RepairType repairType) @@ -109,43 +97,8 @@ protected AutoRepairState(RepairType repairType) protected RepairCoordinator getRepairRunnable(String keyspace, RepairOption options) { - RepairCoordinator task = new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), - options, keyspace); - task.addProgressListener(this); - - return task; - } - - @Override - public void progress(String tag, ProgressEvent event) - { - ProgressEventType type = event.getType(); - String message = String.format("[%s] %s", format.format(currentTimeMillis()), event.getMessage()); - if (type == ProgressEventType.ERROR) - { - logger.error("Repair failure for {} repair: {}", repairType.toString(), message); - success = false; - condition.signalAll(); - } - if (type == ProgressEventType.PROGRESS) - { - message = message + " (progress: " + (int) event.getProgressPercentage() + "%)"; - logger.debug("Repair progress for {} repair: {}", repairType.toString(), message); - } - if (type == ProgressEventType.COMPLETE) - { - success = true; - condition.signalAll(); - } - } - - public void waitForRepairToComplete(DurationSpec.IntSecondsBound repairSessionTimeout) throws InterruptedException - { - //if for some reason we don't hear back on repair progress for sometime - if (!condition.await(repairSessionTimeout.toSeconds(), TimeUnit.SECONDS)) - { - success = false; - } + return new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), + options, keyspace); } public long getLastRepairTime() @@ -272,11 +225,6 @@ public int getSkippedTablesCount() return skippedTablesCount; } - public boolean isSuccess() - { - return success; - } - public void recordTurn(AutoRepairUtils.RepairTurn turn) { metrics.recordTurn(turn); @@ -291,11 +239,6 @@ public int getTotalDisabledTablesRepairCount() { return totalDisabledTablesRepairCount; } - - public void resetWaitCondition() - { - condition = newOneTimeCondition(); - } } class PreviewRepairedState extends AutoRepairState diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index cca5513e3712..a6ed14ff77ea 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -24,8 +24,10 @@ import java.util.List; import java.util.Set; import java.util.UUID; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import com.google.common.collect.Sets; @@ -34,6 +36,8 @@ import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.LocalStrategy; +import org.apache.cassandra.repair.RepairParallelism; +import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.service.StorageService; @@ -57,10 +61,15 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.apache.cassandra.utils.progress.ProgressEventType; +import org.apache.cassandra.utils.progress.ProgressListener; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; +import org.mockito.invocation.InvocationOnMock; import static org.apache.cassandra.Util.setAutoRepairEnabled; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; @@ -74,6 +83,7 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -246,7 +256,6 @@ public void testRepairAsync() AutoRepair.instance.repairAsync(repairType); verify(mockExecutor, Mockito.times(1)).submit(Mockito.any(Runnable.class)); - } @Test @@ -502,6 +511,10 @@ public void testMetrics() AutoRepair.instance.repairStates.put(repairType, autoRepairState); when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) .thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); when(autoRepairState.getFailedTokenRangesCount()).thenReturn(10); when(autoRepairState.getSucceededTokenRangesCount()).thenReturn(11); when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); @@ -514,29 +527,29 @@ public void testMetrics() } @Test - public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() throws Exception + public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() { AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - config.setMaterializedViewRepairEnabled(repairType, false); - config.setRepairRetryBackoff("0s"); - when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) - .thenReturn(repairRunnable); AutoRepair.instance.repairStates.put(repairType, autoRepairState); when(autoRepairState.getLastRepairTime()).thenReturn((long) 0); - AtomicInteger resetWaitConditionCalls = new AtomicInteger(); - AtomicInteger waitForRepairCompletedCalls = new AtomicInteger(); + AtomicInteger getRepairRunnableCalls = new AtomicInteger(); + AtomicReference prevListener = new AtomicReference<>(); doAnswer(invocation -> { - resetWaitConditionCalls.getAndIncrement(); - assertEquals("waitForRepairToComplete was called before resetWaitCondition", - resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get() + 1); - return null; - }).when(autoRepairState).resetWaitCondition(); + if (getRepairRunnableCalls.getAndIncrement() > 0) + { + // progress listener from previous repair should be signalled before starting new repair + assertTrue(prevListener.get().condition.isSignalled()); + } + getRepairRunnableCalls.incrementAndGet(); + return repairRunnable; + }).when(autoRepairState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); doAnswer(invocation -> { - waitForRepairCompletedCalls.getAndIncrement(); - assertEquals("resetWaitCondition was not called before waitForRepairToComplete", - resetWaitConditionCalls.get(), waitForRepairCompletedCalls.get()); + // sending out a COMPLETE event with a 10ms delay + Executors.newScheduledThreadPool(1).schedule(() -> { + invocation.getArgument(0, AutoRepair.RepairProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + }, 10, TimeUnit.MILLISECONDS); return null; - }).when(autoRepairState).waitForRepairToComplete(config.getRepairSessionTimeout(repairType)); + }).when(repairRunnable).addProgressListener(Mockito.any()); AutoRepair.instance.repair(repairType); AutoRepair.instance.repair(repairType); @@ -632,7 +645,10 @@ public void testRepairTakesLastRepairTimeFromDB() public void testRepairMaxRetries() { when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); - when(autoRepairState.isSuccess()).thenReturn(false); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); AtomicInteger sleepCalls = new AtomicInteger(); AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { @@ -646,10 +662,10 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); // Expect configured retries for each table expected to be repaired - assertEquals(config.getRepairMaxRetries() * expectedRepairAssignments, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(expectedRepairAssignments); + assertEquals(config.getRepairMaxRetries()*expectedRepairAssignments, sleepCalls.get()); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(0); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(expectedRepairAssignments); } @Test @@ -664,20 +680,27 @@ public void testRepairSuccessAfterRetry() assertEquals(TimeUnit.SECONDS, unit); assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); }; - when(autoRepairState.isSuccess()).then((invocationOnMock) -> { - if (sleepCalls.get() == 0) { - return false; + doAnswer(invocation -> { + if (sleepCalls.get() == 0) + { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + } + else + { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); } - return true; - }); + + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); config.setRepairMinInterval(repairType, "0s"); + config.setRepairMaxRetries(1); AutoRepair.instance.repairStates.put(repairType, autoRepairState); AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); } @Test @@ -755,8 +778,11 @@ public void testRepairThrowsForIRWithCDCReplay() @Test public void testSoakAfterImmediateRepair() { - when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); - when(autoRepairState.isSuccess()).thenReturn(true); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.repair_task_min_duration = new DurationSpec.LongSecondsBound("10s"); AtomicInteger sleepCalls = new AtomicInteger(); @@ -772,16 +798,19 @@ public void testSoakAfterImmediateRepair() AutoRepair.instance.repair(repairType); assertEquals(1, sleepCalls.get()); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); } @Test public void testNoSoakAfterRepair() { - when(autoRepairState.getRepairRunnable(any(), any(), any(), anyBoolean())).thenReturn(repairRunnable); - when(autoRepairState.isSuccess()).thenReturn(true); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { @@ -792,8 +821,105 @@ public void testNoSoakAfterRepair() AutoRepair.instance.repair(repairType); - verify(autoRepairState, Mockito.times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); - verify(autoRepairState, Mockito.times(1)).setSkippedTokenRangesCount(0); - verify(autoRepairState, Mockito.times(1)).setFailedTokenRangesCount(0); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); + } + + @Test + public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() + { + RepairOption options = new RepairOption(RepairParallelism.PARALLEL, true, repairType == AutoRepairConfig.RepairType.INCREMENTAL, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), Set.of(), + false, false, PreviewKind.NONE, false, true, false, false, false, false); + AutoRepairState repairState = AutoRepair.instance.repairStates.get(repairType); + AutoRepairState spyState = spy(repairState); + AtomicReference failingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); + AtomicReference failingListener = new AtomicReference<>(); + AtomicReference succeedingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); + AtomicInteger repairRunableCalls = new AtomicInteger(); + doAnswer((InvocationOnMock inv ) -> { + RepairCoordinator runnable = spy(repairState.getRepairRunnable(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2), + inv.getArgument(3))); + if (repairRunableCalls.getAndIncrement() == 0) + { + // this will be used for first repair job + doAnswer(invocation -> { + // repair runnable for the first repair job will immediately fail + failingListener.set(invocation.getArgument(0, AutoRepair.RepairProgressListener.class)); + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(runnable).addProgressListener(Mockito.any()); + failingRepair.set(runnable); + } + else + { + // this will be used for subsequent repair jobs + doAnswer(invocation -> { + if (repairRunableCalls.get() > 0) + { + // repair runnable for the subsequent repair jobs will immediately complete + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + + } + // repair runnable for the first repair job will continue firing ERROR events + failingListener.get().progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(runnable).addProgressListener(Mockito.any()); + succeedingRepair.set(runnable); + } + return runnable; + }).when(spyState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); + when(spyState.getLastRepairTime()).thenReturn((long) 0); + AutoRepairService.instance.getAutoRepairConfig().setRepairMaxRetries(0); + AutoRepair.instance.repairStates.put(repairType, spyState); + + AutoRepair.instance.repair(repairType); + + assertEquals(1, (int) AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue()); + // only the first repair job should have failed despite it continuously firing ERROR events + verify(spyState, times(1)).setFailedTokenRangesCount(1); + } + + @Test + public void testProgressError() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0, "test")); + + assertFalse(listener.success); + assertTrue(listener.condition.isSignalled()); + } + + @Test + public void testProgressProgress() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.PROGRESS, 0, 0, "test")); + + assertFalse(listener.success); + assertFalse(listener.condition.isSignalled()); + } + + @Test + public void testProgresComplete() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0, "test")); + + assertTrue(listener.success); + assertTrue(listener.condition.isSignalled()); + } + + @Test + public void testAwait() throws Exception + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + listener.progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0, "test")); + + listener.await(); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index b4bb20c7eafd..13ca3d62d676 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -21,8 +21,6 @@ import java.util.Arrays; import java.util.Collection; import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.TimeUnit; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -36,17 +34,13 @@ import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; import org.apache.cassandra.service.AutoRepairService; -import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.progress.ProgressEvent; -import org.apache.cassandra.utils.progress.ProgressEventType; import org.mockito.Mock; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.when; import static org.mockito.MockitoAnnotations.initMocks; /** @@ -89,62 +83,7 @@ public void testGetRepairRunnable() } @Test - public void testProgressError() throws InterruptedException - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); - - state.progress("test", progressEvent); - - assertFalse(state.success); - assertTrue(state.condition.await(0, TimeUnit.MILLISECONDS)); - } - - @Test - public void testProgress_progress() throws InterruptedException - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - when(progressEvent.getType()).thenReturn(ProgressEventType.PROGRESS); - - state.progress("test", progressEvent); - - assertTrue(state.success); - assertFalse(state.condition.await(0, TimeUnit.MILLISECONDS)); - } - - - @Test - public void testProgress_complete() throws InterruptedException - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - when(progressEvent.getType()).thenReturn(ProgressEventType.COMPLETE); - - state.progress("test", progressEvent); - - assertTrue(state.success); - assertTrue(state.condition.await(1, TimeUnit.MILLISECONDS)); - } - - @Test - public void testWaitForRepairToComplete() throws Exception - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.condition.signalAll(); - Condition finishedCondition = Condition.newOneTimeCondition(); - Callable waitForRepairToComplete = () -> { - state.waitForRepairToComplete(new DurationSpec.IntSecondsBound("12h")); - finishedCondition.signalAll(); - return null; - }; - - waitForRepairToComplete.call(); - - assertTrue(finishedCondition.await(1, TimeUnit.MILLISECONDS)); - } - - @Test - public void testGetLastRepairTime() - { + public void testGetLastRepairTime() { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.lastRepairTimeInMs = 1; @@ -372,42 +311,4 @@ public void testGetFailedTokenRangesCount() assertEquals(1, state.getFailedTokenRangesCount()); } - - @Test - public void isSuccess() - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.success = true; - - assertTrue(state.isSuccess()); - - state.success = false; - - assertFalse(state.isSuccess()); - } - - @Test - public void testWaitForRepairToCompleteDoesNotSetSuccessWhenProgressReceivesError() throws InterruptedException - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - when(progressEvent.getType()).thenReturn(ProgressEventType.ERROR); - - state.progress("test", progressEvent); - assertFalse(state.success); - - state.waitForRepairToComplete(new DurationSpec.IntSecondsBound("12h")); - assertFalse(state.success); - } - - @Test - public void testResetWaitCondition() - { - AutoRepairState state = RepairType.getAutoRepairState(repairType); - state.condition.signalAll(); - assertTrue(state.condition.isSignalled()); - - state.resetWaitCondition(); - - assertFalse(state.condition.isSignalled()); - } } From f6dbf1c7f949060c084f5811c08181f1aa498b78 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Mon, 24 Feb 2025 13:46:54 -0800 Subject: [PATCH 208/257] Address comments --- .../repair/autorepair/AutoRepairParameterizedTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index a6ed14ff77ea..ca5f0d50577c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -20,6 +20,7 @@ import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -830,8 +831,8 @@ public void testNoSoakAfterRepair() public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() { RepairOption options = new RepairOption(RepairParallelism.PARALLEL, true, repairType == AutoRepairConfig.RepairType.INCREMENTAL, false, - AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), Set.of(), - false, false, PreviewKind.NONE, false, true, false, false, false, false); + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), Collections.emptySet(), + false, false, PreviewKind.NONE, false, true, false, false, false, false); AutoRepairState repairState = AutoRepair.instance.repairStates.get(repairType); AutoRepairState spyState = spy(repairState); AtomicReference failingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); From d2aaf4047972df14861857134d04d56bf8ba3e29 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Mon, 24 Feb 2025 13:52:21 -0800 Subject: [PATCH 209/257] Address comments --- .../apache/cassandra/repair/autorepair/AutoRepair.java | 8 ++++---- .../repair/autorepair/AutoRepairParameterizedTest.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index ed28bfcf1afb..6a92bbe28fe1 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -324,7 +324,7 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim try { long jobStartTime = timeFunc.get(); - listener.await(); + listener.await(config.getRepairSessionTimeout(repairType)); success = listener.isSuccess(); soakAfterRepair(jobStartTime, config.getRepairTaskMinDuration().toMilliseconds()); } @@ -524,10 +524,10 @@ public RepairProgressListener(AutoRepairConfig.RepairType repairType) this.repairType = repairType; } - public void await() throws InterruptedException + public void await(DurationSpec.IntSecondsBound repairSessionTimeout) throws InterruptedException { //if for some reason we don't hear back on repair progress for sometime - if (!condition.await(12, TimeUnit.HOURS)) + if (!condition.await(repairSessionTimeout.to(TimeUnit.SECONDS), TimeUnit.SECONDS)) { success = false; } @@ -542,7 +542,7 @@ public boolean isSuccess() public void progress(String tag, ProgressEvent event) { ProgressEventType type = event.getType(); - String message = String.format("[%s] %s", format.format(System.currentTimeMillis()), event.getMessage()); + String message = String.format("[%s] %s", format.format(Clock.Global.currentTimeMillis()), event.getMessage()); if (type == ProgressEventType.ERROR) { logger.error("Repair failure for repair {}: {}", repairType.toString(), message); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index ca5f0d50577c..31541f589792 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -921,6 +921,6 @@ public void testAwait() throws Exception AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); listener.progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0, "test")); - listener.await(); + listener.await(new DurationSpec.IntSecondsBound("12h")); } } From d30e50e405e7f2238dc5fe14d81e904c99f52e7e Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Mon, 24 Feb 2025 15:20:30 -0800 Subject: [PATCH 210/257] Fix formatting --- .../org/apache/cassandra/repair/autorepair/AutoRepair.java | 2 +- .../cassandra/repair/autorepair/AutoRepairStateTest.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 6a92bbe28fe1..6d392470f210 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -527,7 +527,7 @@ public RepairProgressListener(AutoRepairConfig.RepairType repairType) public void await(DurationSpec.IntSecondsBound repairSessionTimeout) throws InterruptedException { //if for some reason we don't hear back on repair progress for sometime - if (!condition.await(repairSessionTimeout.to(TimeUnit.SECONDS), TimeUnit.SECONDS)) + if (!condition.await(repairSessionTimeout.toSeconds(), TimeUnit.SECONDS)) { success = false; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index 13ca3d62d676..fc4ac2f03c79 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -83,7 +83,8 @@ public void testGetRepairRunnable() } @Test - public void testGetLastRepairTime() { + public void testGetLastRepairTime() + { AutoRepairState state = RepairType.getAutoRepairState(repairType); state.lastRepairTimeInMs = 1; From 58cdd1c1d8d82421359bccf8f37dbb9809867301 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 3 Mar 2025 09:26:27 -0600 Subject: [PATCH 211/257] Account for vnodes with repair_by_keyspace in RepairTokenRangeSplitter Updates RepairTokenRangeSplitter to call getRepairAssignmentsForKeyspace for each individual token range so repair_by_keyspace grouping works correctly. Patch by Andy Tolbert; Reviewed by Jaydeepkumar Chovatia for CASSANDRA-20392 --- .../autorepair/RepairTokenRangeSplitter.java | 46 ++++---- .../AutoRepairParameterizedTest.java | 48 ++++----- .../RepairTokenRangeSplitterTest.java | 102 +++++++++++++----- 3 files changed, 124 insertions(+), 72 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index b6dcfc61b4ae..a76cf3301d03 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -121,7 +121,7 @@ *

      • * max_bytes_per_schedule: The maximum number of bytes to cover an individual schedule. This serves * as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to - * reduce this value if the impact of repairs is causing too many load on the cluster, or increase it if + * reduce this value if the impact of repairs is causing too much load on the cluster, or increase it if * writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up * the min_repair_interval. *
      • @@ -294,8 +294,16 @@ protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repair return new KeyspaceRepairAssignments(priority, repairPlan.getKeyspaceName(), Collections.emptyList()); } - Collection> tokenRanges = getTokenRanges(primaryRangeOnly, repairPlan.getKeyspaceName()); - List repairAssignments = getRepairAssignmentsForKeyspace(repairType, repairPlan.getKeyspaceName(), repairPlan.getTableNames(), tokenRanges); + List> tokenRanges = getTokenRanges(primaryRangeOnly, repairPlan.getKeyspaceName()); + // shuffle token ranges to unbias selection of ranges + Collections.shuffle(tokenRanges); + List repairAssignments = new ArrayList<>(); + // Generate assignments for each range speparately + for (Range tokenRange : tokenRanges) + { + repairAssignments.addAll(getRepairAssignmentsForKeyspace(repairType, repairPlan.getKeyspaceName(), repairPlan.getTableNames(), tokenRange)); + } + FilteredRepairAssignments filteredRepairAssignments = filterRepairAssignments(priority, repairPlan.getKeyspaceName(), repairAssignments, bytesSoFar); bytesSoFar = filteredRepairAssignments.newBytesSoFar; return new KeyspaceRepairAssignments(priority, repairPlan.getKeyspaceName(), filteredRepairAssignments.repairAssignments); @@ -303,7 +311,7 @@ protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repair } @VisibleForTesting - List getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Collection> tokenRanges) + List getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Range tokenRange) { List repairAssignments = new ArrayList<>(); // this is used for batching minimal single assignment tables together @@ -313,9 +321,10 @@ List getRepairAssignmentsForKeyspace(AutoRepairConfig.Rep // If we can repair by keyspace, sort the tables by size so can batch the smallest ones together boolean repairByKeyspace = config.getRepairByKeyspace(repairType); + List tablesToProcess = tableNames; if (repairByKeyspace) { - tableNames.sort((t1, t2) -> { + tablesToProcess = tableNames.stream().sorted((t1, t2) -> { ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); // If for whatever reason the CFS is not retrievable, we can assume it has been deleted, so give the @@ -331,12 +340,12 @@ else if (cfs2 == null) return 1; } return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); - }); + }).toList(); } - for (String tableName : tableNames) + for (String tableName : tablesToProcess) { - List tableAssignments = getRepairAssignmentsForTable(keyspaceName, tableName, tokenRanges); + List tableAssignments = getRepairAssignmentsForTable(keyspaceName, tableName, tokenRange); if (tableAssignments.isEmpty()) continue; @@ -519,9 +528,9 @@ static SizedRepairAssignment merge(List assignments) } @VisibleForTesting - protected List getRepairAssignmentsForTable(String keyspaceName, String tableName, Collection> tokenRanges) + protected List getRepairAssignmentsForTable(String keyspaceName, String tableName, Range tokenRange) { - List sizeEstimates = getRangeSizeEstimate(keyspaceName, tableName, tokenRanges); + List sizeEstimates = getRangeSizeEstimate(keyspaceName, tableName, tokenRange); return getRepairAssignments(sizeEstimates); } @@ -632,7 +641,7 @@ private int calculateNumberOfSplits(SizeEstimate estimate) return splits; } - private Collection> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) + private List> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) { // Collect all applicable token ranges Collection> wrappedRanges; @@ -654,18 +663,15 @@ private Collection> getTokenRanges(boolean primaryRangeOnly, String return ranges; } - private List getRangeSizeEstimate(String keyspace, String table, Collection> tokenRanges) + private List getRangeSizeEstimate(String keyspace, String table, Range tokenRange) { List sizeEstimates = new ArrayList<>(); - for (Range tokenRange : tokenRanges) + logger.debug("Calculating size estimate for {}.{} for range {}", keyspace, table, tokenRange); + try (Refs refs = getSSTableReaderRefs(repairType, keyspace, table, tokenRange)) { - logger.debug("Calculating size estimate for {}.{} for range {}", keyspace, table, tokenRange); - try (Refs refs = getSSTableReaderRefs(repairType, keyspace, table, tokenRange)) - { - SizeEstimate estimate = getSizesForRangeOfSSTables(repairType, keyspace, table, tokenRange, refs); - logger.debug("Generated size estimate {}", estimate); - sizeEstimates.add(estimate); - } + SizeEstimate estimate = getSizesForRangeOfSSTables(repairType, keyspace, table, tokenRange, refs); + logger.debug("Generated size estimate {}", estimate); + sizeEstimates.add(estimate); } return sizeEstimates; } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 31541f589792..a5d2c7741950 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -133,10 +133,9 @@ public static void setupClass() throws Exception StorageService.instance.doAutoRepairSetup(); DatabaseDescriptor.setCDCEnabled(false); - // Calculate the expected number of tables to be repaired, this should be all system keyspaces that are - // distributed, plus 1 for the table we created (ks.tbl), excluding the 'mv' materialized view and - // 'tbl_disabled_auto_repair' we created. - int expectedTablesGoingThroughRepair = 0; + // Calculate the expected number of keyspaces to be repaired, this should be all system keyspaces that are + // distributed, plus 1 for the table we created (ks.tbl). + int expectedKeyspacesGoingThroughRepair = 0; for (Keyspace keyspace : Keyspace.all()) { // skip LocalStrategy keyspaces as these aren't repaired. @@ -150,11 +149,10 @@ public static void setupClass() throws Exception continue; } - int expectedTables = keyspace.getName().equals("ks") ? 1 : keyspace.getColumnFamilyStores().size(); - expectedTablesGoingThroughRepair += expectedTables; + expectedKeyspacesGoingThroughRepair += 1; } // Since the splitter will unwrap a full token range, we expect twice as many repairs. - expectedRepairAssignments = expectedTablesGoingThroughRepair * 2; + expectedRepairAssignments = expectedKeyspacesGoingThroughRepair * 2; } @Before @@ -170,7 +168,8 @@ public void setup() DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - MockitoAnnotations.initMocks(this); + //noinspection resource + MockitoAnnotations.openMocks(this); Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).truncateBlocking(); Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).disableAutoCompaction(); @@ -263,7 +262,7 @@ public void testRepairAsync() public void testRepairTurn() { UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + Assert.assertNotEquals("Expected my turn for the repair", NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); } @Test @@ -288,8 +287,8 @@ public void testTooFrequentRepairs() AutoRepair.instance.repair(repairType); long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); - Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), - consideredTables, 0); + Assert.assertNotEquals(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), + 0, consideredTables); //if repair was done in last 24 hours then it should not trigger another repair config.setRepairMinInterval(repairType, "24h"); @@ -313,12 +312,12 @@ public void testNonFrequentRepairs() Assert.assertTrue(String.format("Expected lastRepairTime1 > 0, actual value lastRepairTime1 %d", lastRepairTime1), lastRepairTime1 > 0); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", - AutoRepairUtils.myTurnToRunRepair(repairType, myId) != NOT_MY_TURN); + Assert.assertNotEquals("Expected my turn for the repair", + NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); AutoRepair.instance.repair(repairType); long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); Assert.assertNotSame(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + - "lastRepairTime2 ", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @@ -330,16 +329,15 @@ public void testGetPriorityHosts() AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); long prevCount = state.getTotalMVTablesConsideredForRepair(); AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); - Assert.assertSame(String.format("Priority host count is not same, actual value %d, expected value %d", - AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); + Assert.assertEquals(String.format("Priority host count is not same, actual value %d, expected value %d", + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), 0, AutoRepairUtils.getPriorityHosts(repairType).size()); UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); - Assert.assertTrue("Expected my turn for the repair", AutoRepairUtils.myTurnToRunRepair(repairType, myId) != - NOT_MY_TURN); + Assert.assertNotEquals("Expected my turn for the repair", NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); AutoRepair.instance.repair(repairType); AutoRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); AutoRepair.instance.repair(repairType); - Assert.assertSame(String.format("Priority host count is not same actual value %d, expected value %d", - AutoRepairUtils.getPriorityHosts(repairType).size(), 0), AutoRepairUtils.getPriorityHosts(repairType).size(), 0); + Assert.assertEquals(String.format("Priority host count is not same actual value %d, expected value %d", + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), 0, AutoRepairUtils.getPriorityHosts(repairType).size()); assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); } @@ -388,7 +386,7 @@ public void testGetAllMVs() config.setMaterializedViewRepairEnabled(repairType, true); assertTrue(config.getMaterializedViewRepairEnabled(repairType)); - assertEquals(Arrays.asList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); + assertEquals(List.of(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); config.setMaterializedViewRepairEnabled(repairType, false); } @@ -571,7 +569,7 @@ public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() AutoRepair.instance.repair(repairType); int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), - consideredTables, 0); + 0, consideredTables); int disabledTablesRepairCountAfter = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); Assert.assertTrue(String.format("A table %s should be skipped from auto repair, expected value: %d, actual value %d ", TABLE_DISABLED_AUTO_REPAIR, disabledTablesRepairCountBefore + 1, disabledTablesRepairCountAfter), disabledTablesRepairCountBefore < disabledTablesRepairCountAfter); @@ -662,7 +660,7 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); - // Expect configured retries for each table expected to be repaired + // Expect configured retries for each keyspace expected to be repaired assertEquals(config.getRepairMaxRetries()*expectedRepairAssignments, sleepCalls.get()); verify(autoRepairState, times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); @@ -835,9 +833,7 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() false, false, PreviewKind.NONE, false, true, false, false, false, false); AutoRepairState repairState = AutoRepair.instance.repairStates.get(repairType); AutoRepairState spyState = spy(repairState); - AtomicReference failingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); AtomicReference failingListener = new AtomicReference<>(); - AtomicReference succeedingRepair = new AtomicReference<>(new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), options, keyspace.getName())); AtomicInteger repairRunableCalls = new AtomicInteger(); doAnswer((InvocationOnMock inv ) -> { RepairCoordinator runnable = spy(repairState.getRepairRunnable(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2), @@ -851,7 +847,6 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); return null; }).when(runnable).addProgressListener(Mockito.any()); - failingRepair.set(runnable); } else { @@ -867,7 +862,6 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() failingListener.get().progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); return null; }).when(runnable).addProgressListener(Mockito.any()); - succeedingRepair.set(runnable); } return runnable; }).when(spyState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index deea6a33f1a3..d5c6114af5a1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -34,11 +34,11 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.auth.AuthKeyspace; import org.apache.cassandra.config.DataStorageSpec.LongMebibytesBound; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.format.SSTableReader; @@ -55,6 +55,8 @@ import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.BYTES_PER_ASSIGNMENT; import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.MAX_TABLES_PER_ASSIGNMENT; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** @@ -87,6 +89,7 @@ public static void setUpClass() @Before public void setUp() { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(sstableFormat)); repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.emptyMap()); tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); @@ -102,7 +105,7 @@ public void setUp() } @Test - public void testSizePartitionCount() throws Throwable + public void testSizePartitionCount() { insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); try (Refs sstables = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE)) @@ -114,7 +117,7 @@ public void testSizePartitionCount() throws Throwable } @Test - public void testSizePartitionCountSplit() throws Throwable + public void testSizePartitionCountSplit() { int partitionCount = 100_000; int[] values = new int[partitionCount]; @@ -143,28 +146,25 @@ public void testSizePartitionCountSplit() throws Throwable public void testGetRepairAssignmentsForTable_NoSSTables() { // Should return 1 assignment if there are no SSTables - Collection> ranges = Collections.singleton(new Range<>(Murmur3Partitioner.instance.getMinimumToken(), Murmur3Partitioner.instance.getMaximumToken())); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, FULL_RANGE); assertEquals(1, assignments.size()); } @Test - public void testGetRepairAssignmentsForTable_Single() throws Throwable + public void testGetRepairAssignmentsForTable_Single() { - Collection> ranges = Collections.singleton(new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken())); insertAndFlushSingleTable(); - List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, FULL_RANGE); assertEquals(1, assignments.size()); } @Test - public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable + public void testGetRepairAssignmentsForTable_BatchingTables() { repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "2")); - Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); // We expect two assignments, one with table1 and table2 batched, and one with table3 assertEquals(2, assignments.size()); @@ -173,13 +173,12 @@ public void testGetRepairAssignmentsForTable_BatchingTables() throws Throwable } @Test - public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable + public void testGetRepairAssignmentsForTable_BatchSize() { repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "2")); - Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(2); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); // We expect one assignment, with two tables batched assertEquals(1, assignments.size()); @@ -187,25 +186,23 @@ public void testGetRepairAssignmentsForTable_BatchSize() throws Throwable } @Test - public void testGetRepairAssignmentsForTable_NoBatching() throws Throwable + public void testGetRepairAssignmentsForTable_NoBatching() { repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "1")); - Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(3); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); assertEquals(3, assignments.size()); } @Test - public void testGetRepairAssignmentsForTable_AllBatched() throws Throwable + public void testGetRepairAssignmentsForTable_AllBatched() { repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "100")); - Collection> ranges = Collections.singleton(FULL_RANGE); List tableNames = createAndInsertTables(5); - List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, ranges); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); assertEquals(1, assignments.size()); } @@ -333,6 +330,62 @@ public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxByte assertEquals(expectedBytes * 2, filteredRepairAssignments.newBytesSoFar); } + @Test + public void testTokenRangesRepairByKeyspace() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); + + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", AuthKeyspace.TABLE_NAMES.stream().toList()); + final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); + + Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + // Should only be two assignments (since single node encompasses the whole range, should get 2 primary ranges) + // to account for the range wrapping the ring. + assertEquals(2, assignments.size()); + + for (RepairAssignment assignment : assignments) + { + assertEquals(AuthKeyspace.TABLE_NAMES.size(), assignment.getTableNames().size()); + } + } + + @Test + public void testTokenRangesRepairByKeyspaceFalse() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, false); + + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", AuthKeyspace.TABLE_NAMES.stream().toList()); + final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); + + Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + // Should be two ranges * X system_auth table names assignments + assertEquals(2 * AuthKeyspace.TABLE_NAMES.size(), assignments.size()); + + // each assignment should only include one table. + for (RepairAssignment assignment : assignments) + { + assertEquals(1, assignment.getTableNames().size()); + } + } + @Test(expected = IllegalArgumentException.class) public void testSetParameterShouldNotAllowUnknownParameter() { @@ -377,13 +430,13 @@ private SizeEstimate sizeEstimateByBytes(LongMebibytesBound sizeInRange, LongMeb return new SizeEstimate(RepairType.INCREMENTAL, KEYSPACE, "table1", FULL_RANGE, 1, sizeInRange.toBytes(), totalSize.toBytes()); } - private void insertAndFlushSingleTable() throws Throwable + private void insertAndFlushSingleTable() { execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); flush(); } - private List createAndInsertTables(int count) throws Throwable + private List createAndInsertTables(int count) { List tableNames = new ArrayList<>(); for (int i = 0; i < count; i++) @@ -395,13 +448,12 @@ private List createAndInsertTables(int count) throws Throwable return tableNames; } - private void insertAndFlushTable(String tableName) throws Throwable + private void insertAndFlushTable(String tableName) { insertAndFlushTable(tableName, 1); - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(KEYSPACE, tableName); } - private void insertAndFlushTable(String tableName, int... vals) throws Throwable + private void insertAndFlushTable(String tableName, int... vals) { for (int i : vals) { From 0161f3a381418604133e6366ac237f97742e1657 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 3 Mar 2025 15:25:16 -0600 Subject: [PATCH 212/257] Make work with JDK11 (and JDK8) --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- .../repair/autorepair/AutoRepairParameterizedTest.java | 2 +- .../repair/autorepair/RepairTokenRangeSplitterTest.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index a76cf3301d03..d746d81b71f6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -340,7 +340,7 @@ else if (cfs2 == null) return 1; } return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); - }).toList(); + }).collect(Collectors.toList()); } for (String tableName : tablesToProcess) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index a5d2c7741950..faa70e3d451f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -386,7 +386,7 @@ public void testGetAllMVs() config.setMaterializedViewRepairEnabled(repairType, true); assertTrue(config.getMaterializedViewRepairEnabled(repairType)); - assertEquals(List.of(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); + assertEquals(Collections.singletonList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); config.setMaterializedViewRepairEnabled(repairType, false); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index d5c6114af5a1..f727a3a2c44b 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -335,7 +335,7 @@ public void testTokenRangesRepairByKeyspace() { AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); - final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", AuthKeyspace.TABLE_NAMES.stream().toList()); + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", new ArrayList<>(AuthKeyspace.TABLE_NAMES)); final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); @@ -363,7 +363,7 @@ public void testTokenRangesRepairByKeyspaceFalse() { AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, false); - final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", AuthKeyspace.TABLE_NAMES.stream().toList()); + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", new ArrayList<>(AuthKeyspace.TABLE_NAMES)); final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); From 603b77fbcc9aabe0ef15cbad1b7ad934b2afa8db Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 3 Mar 2025 15:47:20 -0800 Subject: [PATCH 213/257] Adjust as per latest trunk as of 03-Mar-2025 From 4dd153287ce845904e7d1304b52f9ce157450dae Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 3 Mar 2025 16:11:46 -0800 Subject: [PATCH 214/257] Type & Remove whitespace From e6846ae7a2765f564cad293d7ddc43bf48a40263 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 12:19:57 -0800 Subject: [PATCH 215/257] Ensure the main config obj has been initialized before checking the yaml property --- src/java/org/apache/cassandra/schema/SchemaKeyspace.java | 2 +- src/java/org/apache/cassandra/schema/TableParams.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 7781ef29b9e6..69b43555c42a 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -622,7 +622,7 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui // As above, only add the auto_repair column if the scheduler is enabled // to avoid RTE in pre-5.1 versioned node during upgrades - if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (DatabaseDescriptor.getRawConfig() != null && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { builder.add("auto_repair", params.autoRepair.asMap()); } diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index c56ddd4ab41f..9c870d2a0b59 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -421,7 +421,7 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) } builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); - if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (DatabaseDescriptor.getRawConfig() != null && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { builder.newLine() .append("AND auto_repair = ").append(autoRepair.asMap()); From 7a3475fe56e22469942f49322fb2164a830eaec0 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 13:59:01 -0800 Subject: [PATCH 216/257] Use the last repair stat to confirm that the repair ran indeed instead of sleep to avoid flaky test --- .../repair/autorepair/AutoRepair.java | 2 +- .../test/repair/AutoRepairSchedulerTest.java | 57 +++++++++++++------ 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 6d392470f210..add579851cf0 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -89,7 +89,7 @@ public class AutoRepair protected final Map repairRunnableExecutors; @VisibleForTesting - protected final Map repairStates; + public final Map repairStates; @VisibleForTesting // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index c6fa37f54dd7..a2492858654e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -21,13 +21,13 @@ import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.concurrent.TimeUnit; import java.util.UUID; import com.google.common.collect.ImmutableMap; -import com.google.common.util.concurrent.Uninterruptibles; + import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.schema.SystemDistributedKeyspace; + import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -63,19 +63,19 @@ public static void init() throws IOException ImmutableMap.of( "repair_type_overrides", ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), - ImmutableMap.of( - "initial_scheduler_delay", "5s", - "enabled", "true", - "parallel_repair_count", "1", - "parallel_repair_percentage", "0", - "min_repair_interval", "1s"), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "1", + "parallel_repair_percentage", "0", + "min_repair_interval", "1s"), AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), - ImmutableMap.of( - "initial_scheduler_delay", "5s", - "enabled", "true", - "parallel_repair_count", "1", - "parallel_repair_percentage", "0", - "min_repair_interval", "1s")))) + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "1", + "parallel_repair_percentage", "0", + "min_repair_interval", "1s")))) .set("auto_repair.enabled", "true") .set("auto_repair.global_settings.repair_by_keyspace", "true") .set("auto_repair.repair_task_min_duration", "0s") @@ -106,9 +106,34 @@ public void testScheduler() throws ParseException throw new RuntimeException(e); } })); - // wait for a couple of minutes for repair to go through on all three nodes - Uninterruptibles.sleepUninterruptibly(2, TimeUnit.MINUTES); + // validate that the repair ran on all nodes + cluster.forEach(i -> i.runOnInstance(() -> { + try + { + while(true) + { + if (AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL).getLastRepairTime() == 0 + || AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL).getLastRepairTime() == 0) + { + try + { + Thread.sleep(5000); + } + catch (InterruptedException e) + { + throw new RuntimeException(e); + } + continue; + } + break; + } + } + catch (Exception e) + { + throw new RuntimeException(e); + } + })); validate(AutoRepairConfig.RepairType.FULL.toString()); validate(AutoRepairConfig.RepairType.INCREMENTAL.toString()); } From 0b759a88e087359ccd2f6da34ef08ded6a1d25f3 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 14:29:09 -0800 Subject: [PATCH 217/257] Incorporate the code review comments --- .../cassandra/schema/SchemaKeyspace.java | 3 ++- .../apache/cassandra/schema/TableParams.java | 3 ++- .../test/repair/AutoRepairSchedulerTest.java | 25 ++++++------------- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 69b43555c42a..fe98cfa0ec1a 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -622,7 +622,8 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui // As above, only add the auto_repair column if the scheduler is enabled // to avoid RTE in pre-5.1 versioned node during upgrades - if (DatabaseDescriptor.getRawConfig() != null && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (DatabaseDescriptor.getRawConfig() != null + && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { builder.add("auto_repair", params.autoRepair.asMap()); } diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 9c870d2a0b59..2fe01b78470a 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -421,7 +421,8 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) } builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); - if (DatabaseDescriptor.getRawConfig() != null && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + if (DatabaseDescriptor.getRawConfig() != null + && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) { builder.newLine() .append("AND auto_repair = ").append(autoRepair.asMap()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index a2492858654e..9f0690123cf7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -22,9 +22,11 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.UUID; +import java.util.concurrent.TimeUnit; import com.google.common.collect.ImmutableMap; +import org.apache.cassandra.Util; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.schema.SystemDistributedKeyspace; @@ -40,6 +42,7 @@ import org.apache.cassandra.service.AutoRepairService; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; +import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertEquals; /** @@ -111,23 +114,11 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { - while(true) - { - if (AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL).getLastRepairTime() == 0 - || AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL).getLastRepairTime() == 0) - { - try - { - Thread.sleep(5000); - } - catch (InterruptedException e) - { - throw new RuntimeException(e); - } - continue; - } - break; - } + Util.spinAssert("AutoRepair has not yet completed one repair cycle", + greaterThan(0L), + AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, + 5, + TimeUnit.MINUTES); } catch (Exception e) { From a929963a1e03fe994ab8cc2661f65b11ddb27790 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 14:34:54 -0800 Subject: [PATCH 218/257] Incorporate the code review comments --- .../distributed/test/repair/AutoRepairSchedulerTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 9f0690123cf7..326029392395 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -114,11 +114,16 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { - Util.spinAssert("AutoRepair has not yet completed one repair cycle", + Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", greaterThan(0L), AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, 5, TimeUnit.MINUTES); + Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", + greaterThan(0L), + AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL)::getLastRepairTime, + 5, + TimeUnit.MINUTES); } catch (Exception e) { From 5ca4502fa701f730a638af692b0210f65e645abb Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 14:37:26 -0800 Subject: [PATCH 219/257] Group all public statements together --- .../org/apache/cassandra/repair/autorepair/AutoRepair.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index add579851cf0..07fb4793c51e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -83,14 +83,14 @@ public class AutoRepair // Sleep for 5 seconds if repair finishes quickly to flush JMX metrics; it happens only for Cassandra nodes with tiny amount of data. public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); + @VisibleForTesting + public final Map repairStates; + @VisibleForTesting protected final Map repairExecutors; protected final Map repairRunnableExecutors; - @VisibleForTesting - public final Map repairStates; - @VisibleForTesting // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. From 8081ef3f9fa38ecf034a1fb287e4fed61767fbb8 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 6 Mar 2025 14:41:44 -0800 Subject: [PATCH 220/257] No need for try/catch in the test case --- .../test/repair/AutoRepairSchedulerTest.java | 27 +++++++------------ 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 326029392395..c7b7448d1a24 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -112,23 +112,16 @@ public void testScheduler() throws ParseException // validate that the repair ran on all nodes cluster.forEach(i -> i.runOnInstance(() -> { - try - { - Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", - greaterThan(0L), - AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, - 5, - TimeUnit.MINUTES); - Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", - greaterThan(0L), - AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL)::getLastRepairTime, - 5, - TimeUnit.MINUTES); - } - catch (Exception e) - { - throw new RuntimeException(e); - } + Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", + greaterThan(0L), + AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, + 5, + TimeUnit.MINUTES); + Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", + greaterThan(0L), + AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL)::getLastRepairTime, + 5, + TimeUnit.MINUTES); })); validate(AutoRepairConfig.RepairType.FULL.toString()); validate(AutoRepairConfig.RepairType.INCREMENTAL.toString()); From 5b7ab533f685184f18b4f78902010b3c22c36381 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 7 Mar 2025 14:42:25 -0800 Subject: [PATCH 221/257] Avoid micros splitting of token ranges for FixedSplitTokenRangeSplitter --- .../autorepair/FixedSplitTokenRangeSplitter.java | 4 +++- .../autorepair/FixedSplitTokenRangeSplitterTest.java | 12 ++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index e27b8188bd2f..f8b28fa6001e 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -102,9 +102,11 @@ private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(boolean primar boolean byKeyspace = config.getRepairByKeyspace(repairType); // collect all token ranges. List> allRanges = new ArrayList<>(); + // this is done to avoid micro splits in the case of vnodes + int splitsPerRange = Math.max(1, numberOfSubranges / tokens.size()); for (Range token : tokens) { - allRanges.addAll(split(token, numberOfSubranges)); + allRanges.addAll(split(token, splitsPerRange)); } if (byKeyspace) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index 6326f55f9d5c..67deafac71b6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -61,8 +61,11 @@ public class FixedSplitTokenRangeSplitterTest private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; + private static final int numberOfSubRanges = 4; - private static final Map splitterParams = Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(4)); + private static final int totalTokenRanges = 3; + private static int numberOfSplits; + private static final Map splitterParams = Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges)); @Parameterized.Parameter() public AutoRepairConfig.RepairType repairType; @@ -96,16 +99,16 @@ public static void setupClass() throws Exception SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); + + numberOfSplits = numberOfSubRanges/totalTokenRanges; } @Test public void testTokenRangesSplitByTable() { AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); - int totalTokenRanges = 3; Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(totalTokenRanges, tokens.size()); - int numberOfSplits = 4; List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (int i = 0; i < tables.size(); i++) @@ -156,10 +159,8 @@ public void testTokenRangesSplitByTable() public void testTokenRangesSplitByKeyspace() { AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); - int totalTokenRanges = 3; Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(totalTokenRanges, tokens.size()); - int numberOfSplits = 4; List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (Range range : tokens) @@ -195,7 +196,6 @@ public void testTokenRangesSplitByKeyspace() public void testTokenRangesNoSplitByDefault() { Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - int totalTokenRanges = 3; assertEquals(totalTokenRanges, tokens.size()); List> expectedToken = new ArrayList<>(tokens); From 2f71ca4ec8e00d8a6a64c297dff3aee186f0bba4 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 7 Mar 2025 18:07:00 -0800 Subject: [PATCH 222/257] Make the number_of_subranges parameterized in test case --- .../FixedSplitTokenRangeSplitterTest.java | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index 67deafac71b6..ebe3fe673561 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -26,8 +26,8 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -import java.util.Map; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; @@ -61,19 +61,27 @@ public class FixedSplitTokenRangeSplitterTest private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; - private static final int numberOfSubRanges = 4; - private static final int totalTokenRanges = 3; private static int numberOfSplits; - private static final Map splitterParams = Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges)); - @Parameterized.Parameter() + @Parameterized.Parameter(0) public AutoRepairConfig.RepairType repairType; - @Parameterized.Parameters(name = "repairType={0}") - public static Collection repairTypes() + @Parameterized.Parameter(1) + public int numberOfSubRange; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRange={1}") + public static Collection parameters() { - return Arrays.asList(AutoRepairConfig.RepairType.values()); + List params = new ArrayList<>(); + for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) + { + for (int subRange : Arrays.asList(4, 16, 64, 128)) + { + params.add(new Object[]{ type, subRange }); + } + } + return params; } @BeforeClass @@ -99,8 +107,12 @@ public static void setupClass() throws Exception SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); + } - numberOfSplits = numberOfSubRanges/totalTokenRanges; + @Before + public void before() + { + numberOfSplits = numberOfSubRange / totalTokenRanges; } @Test @@ -121,7 +133,7 @@ public void testTokenRangesSplitByTable() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, splitterParams) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRange))) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -130,7 +142,7 @@ public void testTokenRangesSplitByTable() assertFalse(keyspaceAssignments.hasNext()); List assignments = keyspace.getRepairAssignments(); - assertEquals(totalTokenRanges*numberOfSplits*tables.size(), assignments.size()); + assertEquals(totalTokenRanges * numberOfSplits * tables.size(), assignments.size()); assertEquals(expectedToken.size(), assignments.size()); int expectedTableIndex = -1; @@ -144,9 +156,9 @@ public void testTokenRangesSplitByTable() expectedTableIndex = -1; // should be a set of ranges for each table. - for (int i = 0; i plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, splitterParams) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRange))) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -181,11 +193,11 @@ public void testTokenRangesSplitByKeyspace() List assignments = keyspace.getRepairAssignments(); assertNotNull(assignments); - assertEquals(totalTokenRanges*numberOfSplits, assignments.size()); + assertEquals(totalTokenRanges * numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); // should only be one set of ranges for the entire keyspace. - for (int i = 0; i Date: Sun, 9 Mar 2025 13:51:09 -0700 Subject: [PATCH 223/257] Review comments from Andy --- .../FixedSplitTokenRangeSplitter.java | 13 +- .../FixedSplitTokenRangeSplitterTest.java | 121 +++++++++--------- 2 files changed, 75 insertions(+), 59 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index f8b28fa6001e..e571b025a271 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -45,6 +45,17 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt { private static final Logger logger = LoggerFactory.getLogger(FixedSplitTokenRangeSplitter.class); + /** + * Selecting the default value is tricky. If we select a small number then individual repair would be heavy, + * on the other hand, if we select large then too many repair sessions would be created. + *

        + * The default 64 is a good number for non v-nodes, but with v-nodes it can result in too many repair sessions + * because the number of repair sessions on a node = number_of_subranges * num_token. + *

        + * To keep it balances, 32 serves a good default that would cater to both v-nodes and non v-nodes. + */ + public static final int DEFAULT_SUBRANGES = 32; + /** * The number of subranges to split each to-be-repaired token range into. Defaults to 1. *

        @@ -69,7 +80,7 @@ public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map< { this.repairType = repairType; - numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, "1")); + numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, Integer.toString(DEFAULT_SUBRANGES))); } @Override diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java index ebe3fe673561..c617771c88b1 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java @@ -22,12 +22,11 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; +import java.util.TreeSet; -import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; @@ -36,16 +35,18 @@ import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.BootStrapper; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.sai.disk.format.Version; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; +import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_SUBRANGES; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -61,24 +62,30 @@ public class FixedSplitTokenRangeSplitterTest private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; - private static final int totalTokenRanges = 3; - private static int numberOfSplits; @Parameterized.Parameter(0) public AutoRepairConfig.RepairType repairType; @Parameterized.Parameter(1) - public int numberOfSubRange; + public int numberOfSubRanges; - @Parameterized.Parameters(name = "repairType={0}, numberOfSubRange={1}") + @Parameterized.Parameter(2) + public int numTokens; + + public static final int curTokenRange = 1; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}, numTokens={2}") public static Collection parameters() { List params = new ArrayList<>(); for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) { - for (int subRange : Arrays.asList(4, 16, 64, 128)) + for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) { - params.add(new Object[]{ type, subRange }); + for (int numToken : Arrays.asList(curTokenRange)) + { + params.add(new Object[]{ type, subRange, numToken }); + } } } return params; @@ -92,15 +99,8 @@ public static void setupClass() throws Exception DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); ServerTestUtils.prepareServerNoRegister(); - Token t1 = new Murmur3Partitioner.LongToken(-9223372036854775808L); - Token t2 = new Murmur3Partitioner.LongToken(-3074457345618258603L); - Token t3 = new Murmur3Partitioner.LongToken(3074457345618258602L); - Set tokens = new HashSet<>(); - tokens.add(t1); - tokens.add(t2); - tokens.add(t3); - - ServerTestUtils.registerLocal(tokens); + Set tokens1 = BootStrapper.getRandomTokens(ClusterMetadata.current(), curTokenRange); + ServerTestUtils.registerLocal(tokens1); // Ensure that the on-disk format statics are loaded before the test run Version.LATEST.onDiskFormat(); StorageService.instance.doAutoRepairSetup(); @@ -109,18 +109,13 @@ public static void setupClass() throws Exception QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); } - @Before - public void before() - { - numberOfSplits = numberOfSubRange / totalTokenRanges; - } - @Test public void testTokenRangesSplitByTable() { + int numberOfSplits = calcSplits(numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - assertEquals(totalTokenRanges, tokens.size()); + assertEquals(numTokens, tokens.size()); List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (int i = 0; i < tables.size(); i++) @@ -133,7 +128,7 @@ public void testTokenRangesSplitByTable() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRange))) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges))) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -142,37 +137,31 @@ public void testTokenRangesSplitByTable() assertFalse(keyspaceAssignments.hasNext()); List assignments = keyspace.getRepairAssignments(); - assertEquals(totalTokenRanges * numberOfSplits * tables.size(), assignments.size()); + assertEquals(numTokens * numberOfSplits * tables.size(), assignments.size()); assertEquals(expectedToken.size(), assignments.size()); - int expectedTableIndex = -1; - for (int i = 0; i < totalTokenRanges * numberOfSplits * tables.size(); i++) - { - if (i % (totalTokenRanges * numberOfSplits) == 0) - { - expectedTableIndex++; - } - } - - expectedTableIndex = -1; - // should be a set of ranges for each table. - for (int i = 0; i < totalTokenRanges * numberOfSplits * tables.size(); i++) + int assignmentsPerTable = numTokens * numberOfSplits; + for (int i = 0; i < tables.size(); i++) { - if (i % (totalTokenRanges * numberOfSplits) == 0) + List assignmentForATable = new ArrayList<>(); + List> expectedTokensForATable = new ArrayList<>(); + for (int j = 0; j < assignmentsPerTable; j++) { - expectedTableIndex++; + assertEquals(Arrays.asList(tables.get(i)), assignments.get(i*assignmentsPerTable + j).getTableNames()); + assignmentForATable.add(assignments.get(i*assignmentsPerTable+j)); + expectedTokensForATable.add(expectedToken.get(i*assignmentsPerTable+j)); } - assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); - assertEquals(Arrays.asList(tables.get(expectedTableIndex)), assignments.get(i).getTableNames()); + compare(numberOfSplits, expectedTokensForATable, assignmentForATable); } } @Test public void testTokenRangesSplitByKeyspace() { + int numberOfSplits = calcSplits(numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - assertEquals(totalTokenRanges, tokens.size()); + assertEquals(numTokens, tokens.size()); List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (Range range : tokens) @@ -182,7 +171,7 @@ public void testTokenRangesSplitByKeyspace() List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); - Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRange))) + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges))) .getRepairAssignments(true, plan); // should be only 1 entry for the keyspace. @@ -193,23 +182,23 @@ public void testTokenRangesSplitByKeyspace() List assignments = keyspace.getRepairAssignments(); assertNotNull(assignments); - assertEquals(totalTokenRanges * numberOfSplits, assignments.size()); + assertEquals(numTokens * numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); - // should only be one set of ranges for the entire keyspace. - for (int i = 0; i < totalTokenRanges * numberOfSplits; i++) - { - assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); - assertEquals(tables, assignments.get(i).getTableNames()); - } + compare(numberOfSplits, expectedToken, assignments); } @Test - public void testTokenRangesNoSplitByDefault() + public void testTokenRangesWithDefaultSplit() { + int numberOfSplits = calcSplits(DEFAULT_SUBRANGES); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); - assertEquals(totalTokenRanges, tokens.size()); - List> expectedToken = new ArrayList<>(tokens); + assertEquals(numTokens, tokens.size()); + List> expectedToken = new ArrayList<>(); + for (Range range : tokens) + { + expectedToken.addAll(AutoRepairUtils.split(range, numberOfSplits)); + } List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1); @@ -224,10 +213,26 @@ public void testTokenRangesNoSplitByDefault() assertNotNull(assignments); // should be 3 entries for the table which covers each token range. - assertEquals(totalTokenRanges, assignments.size()); - for (int i = 0; i < totalTokenRanges; i++) + assertEquals(numTokens * numberOfSplits, assignments.size()); + + compare(numberOfSplits, expectedToken, assignments); + } + + private void compare(int numberOfSplits, List> expectedToken, List assignments) + { + assertEquals(expectedToken.size(), assignments.size()); + Set> a = new TreeSet<>(); + Set> b = new TreeSet<>(); + for (int i = 0; i < numTokens * numberOfSplits; i++) { - assertEquals(expectedToken.get(i), assignments.get(i).getTokenRange()); + a.add(expectedToken.get(i)); + b.add(assignments.get(i).getTokenRange()); } + assertEquals(a, b); + } + + private int calcSplits(int subRange) + { + return Math.max(1, subRange / numTokens); } } From c56204f5e595de18f3b01be43a96c8d694239d46 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 14:14:31 -0700 Subject: [PATCH 224/257] Add v-nodes and non v-nodes test cases --- .../FixedSplitTokenRangeSplitter.java | 4 +- ...> FixedSplitTokenRangeSplitterHelper.java} | 96 +++------------- ...edSplitTokenRangeSplitterNoVNodesTest.java | 108 ++++++++++++++++++ ...ixedSplitTokenRangeSplitterVNodesTest.java | 108 ++++++++++++++++++ 4 files changed, 236 insertions(+), 80 deletions(-) rename test/unit/org/apache/cassandra/repair/autorepair/{FixedSplitTokenRangeSplitterTest.java => FixedSplitTokenRangeSplitterHelper.java} (66%) create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index e571b025a271..953216832f1f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -54,7 +54,7 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt *

        * To keep it balances, 32 serves a good default that would cater to both v-nodes and non v-nodes. */ - public static final int DEFAULT_SUBRANGES = 32; + public static final int DEFAULT_NUMBER_OF_SUBRANGES = 32; /** * The number of subranges to split each to-be-repaired token range into. Defaults to 1. @@ -80,7 +80,7 @@ public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map< { this.repairType = repairType; - numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, Integer.toString(DEFAULT_SUBRANGES))); + numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, Integer.toString(DEFAULT_NUMBER_OF_SUBRANGES))); } @Override diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java similarity index 66% rename from test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java rename to test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index c617771c88b1..4b4f41375f8c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -27,92 +27,34 @@ import java.util.Set; import java.util.TreeSet; -import org.junit.BeforeClass; -import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.dht.BootStrapper; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.index.sai.disk.format.Version; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadata; -import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; -import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_SUBRANGES; +import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_NUMBER_OF_SUBRANGES; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** - * Unit tests for {@link org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter} + * Helper class for for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} */ @RunWith(Parameterized.class) -public class FixedSplitTokenRangeSplitterTest +public class FixedSplitTokenRangeSplitterHelper { - private static final String KEYSPACE = "ks"; private static final String TABLE1 = "tbl1"; private static final String TABLE2 = "tbl2"; private static final String TABLE3 = "tbl3"; + public static final String KEYSPACE = "ks"; - @Parameterized.Parameter(0) - public AutoRepairConfig.RepairType repairType; - - @Parameterized.Parameter(1) - public int numberOfSubRanges; - - @Parameterized.Parameter(2) - public int numTokens; - - public static final int curTokenRange = 1; - - @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}, numTokens={2}") - public static Collection parameters() - { - List params = new ArrayList<>(); - for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) - { - for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) - { - for (int numToken : Arrays.asList(curTokenRange)) - { - params.add(new Object[]{ type, subRange, numToken }); - } - } - } - return params; - } - - @BeforeClass - public static void setupClass() throws Exception - { - setupSeed(); - updateConfigs(); - DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); - ServerTestUtils.prepareServerNoRegister(); - - Set tokens1 = BootStrapper.getRandomTokens(ClusterMetadata.current(), curTokenRange); - ServerTestUtils.registerLocal(tokens1); - // Ensure that the on-disk format statics are loaded before the test run - Version.LATEST.onDiskFormat(); - StorageService.instance.doAutoRepairSetup(); - - SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); - } - - @Test - public void testTokenRangesSplitByTable() + public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) { - int numberOfSplits = calcSplits(numberOfSubRanges); + int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(numTokens, tokens.size()); @@ -147,18 +89,17 @@ public void testTokenRangesSplitByTable() List> expectedTokensForATable = new ArrayList<>(); for (int j = 0; j < assignmentsPerTable; j++) { - assertEquals(Arrays.asList(tables.get(i)), assignments.get(i*assignmentsPerTable + j).getTableNames()); - assignmentForATable.add(assignments.get(i*assignmentsPerTable+j)); - expectedTokensForATable.add(expectedToken.get(i*assignmentsPerTable+j)); + assertEquals(Arrays.asList(tables.get(i)), assignments.get(i * assignmentsPerTable + j).getTableNames()); + assignmentForATable.add(assignments.get(i * assignmentsPerTable + j)); + expectedTokensForATable.add(expectedToken.get(i * assignmentsPerTable + j)); } - compare(numberOfSplits, expectedTokensForATable, assignmentForATable); + compare(numTokens, numberOfSplits, expectedTokensForATable, assignmentForATable); } } - @Test - public void testTokenRangesSplitByKeyspace() + public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) { - int numberOfSplits = calcSplits(numberOfSubRanges); + int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(numTokens, tokens.size()); @@ -185,13 +126,12 @@ public void testTokenRangesSplitByKeyspace() assertEquals(numTokens * numberOfSplits, assignments.size()); assertEquals(expectedToken.size(), assignments.size()); - compare(numberOfSplits, expectedToken, assignments); + compare(numTokens, numberOfSplits, expectedToken, assignments); } - @Test - public void testTokenRangesWithDefaultSplit() + public static void testTokenRangesWithDefaultSplit(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) { - int numberOfSplits = calcSplits(DEFAULT_SUBRANGES); + int numberOfSplits = calcSplits(numTokens, DEFAULT_NUMBER_OF_SUBRANGES); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(numTokens, tokens.size()); List> expectedToken = new ArrayList<>(); @@ -215,10 +155,10 @@ public void testTokenRangesWithDefaultSplit() // should be 3 entries for the table which covers each token range. assertEquals(numTokens * numberOfSplits, assignments.size()); - compare(numberOfSplits, expectedToken, assignments); + compare(numTokens, numberOfSplits, expectedToken, assignments); } - private void compare(int numberOfSplits, List> expectedToken, List assignments) + private static void compare(int numTokens, int numberOfSplits, List> expectedToken, List assignments) { assertEquals(expectedToken.size(), assignments.size()); Set> a = new TreeSet<>(); @@ -231,7 +171,7 @@ private void compare(int numberOfSplits, List> expectedToken, List< assertEquals(a, b); } - private int calcSplits(int subRange) + private static int calcSplits(int numTokens, int subRange) { return Math.max(1, subRange / numTokens); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java new file mode 100644 index 000000000000..a92c7d2bc69b --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.dht.BootStrapper; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.sai.disk.format.Version; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; + +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; + +/** + * Unit tests for a setup that does not have v-nodes {@link FixedSplitTokenRangeSplitter} + */ +@RunWith(Parameterized.class) +public class FixedSplitTokenRangeSplitterNoVNodesTest +{ + private static final int numTokens = 1; + + @Parameterized.Parameter(0) + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public int numberOfSubRanges; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}") + public static Collection parameters() + { + List params = new ArrayList<>(); + for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) + { + for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) + { + params.add(new Object[]{ type, subRange }); + } + } + return params; + } + + @BeforeClass + public static void setupClass() throws Exception + { + setupSeed(); + updateConfigs(); + DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); + ServerTestUtils.prepareServerNoRegister(); + + Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); + ServerTestUtils.registerLocal(tokens); + // Ensure that the on-disk format statics are loaded before the test run + Version.LATEST.onDiskFormat(); + StorageService.instance.doAutoRepairSetup(); + + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + } + + @Test + public void testTokenRangesSplitByTable() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByTable(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesSplitByKeyspace() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByKeyspace(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesWithDefaultSplit() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, numberOfSubRanges, repairType); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java new file mode 100644 index 000000000000..1b48537968d4 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.dht.BootStrapper; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.sai.disk.format.Version; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; + +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; + +/** + * Unit tests for a setup that has v-nodes {@link FixedSplitTokenRangeSplitter} + */ +@RunWith(Parameterized.class) +public class FixedSplitTokenRangeSplitterVNodesTest +{ + private static final int numTokens = 16; + + @Parameterized.Parameter(0) + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public int numberOfSubRanges; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}") + public static Collection parameters() + { + List params = new ArrayList<>(); + for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) + { + for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) + { + params.add(new Object[]{ type, subRange }); + } + } + return params; + } + + @BeforeClass + public static void setupClass() throws Exception + { + setupSeed(); + updateConfigs(); + DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); + ServerTestUtils.prepareServerNoRegister(); + + Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); + ServerTestUtils.registerLocal(tokens); + // Ensure that the on-disk format statics are loaded before the test run + Version.LATEST.onDiskFormat(); + StorageService.instance.doAutoRepairSetup(); + + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + } + + @Test + public void testTokenRangesSplitByTable() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByTable(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesSplitByKeyspace() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByKeyspace(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesWithDefaultSplit() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, numberOfSubRanges, repairType); + } +} From b6f760ce85404dbb3bbd39e43e27d88d30d41d0f Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 14:36:52 -0700 Subject: [PATCH 225/257] Adjust the comment --- .../autorepair/FixedSplitTokenRangeSplitter.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 953216832f1f..720e2812a5b0 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -46,13 +46,15 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt private static final Logger logger = LoggerFactory.getLogger(FixedSplitTokenRangeSplitter.class); /** - * Selecting the default value is tricky. If we select a small number then individual repair would be heavy, - * on the other hand, if we select large then too many repair sessions would be created. + * Selecting the default value is tricky. If we select a small number, individual repairs would be heavy. + * On the other hand, if we select a large number, too many repair sessions would be created. *

        - * The default 64 is a good number for non v-nodes, but with v-nodes it can result in too many repair sessions - * because the number of repair sessions on a node = number_of_subranges * num_token. + * The default value of 64 works well for non-vnodes, but with vnodes, it can result in too many repair sessions + * because the total number of repair sessions on a node is calculated as: *

        - * To keep it balances, 32 serves a good default that would cater to both v-nodes and non v-nodes. + * number_of_subranges * num_token + *

        + * To maintain balance, 32 serves as a good default that accommodates both vnodes and non-vnodes effectively. */ public static final int DEFAULT_NUMBER_OF_SUBRANGES = 32; From 913e50301d66b1a69bbfcf179b4578eea9bfc7e2 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 20:16:41 -0700 Subject: [PATCH 226/257] Add the missing nodetool way to update repair_by_keyspace --- .../org/apache/cassandra/service/AutoRepairService.java | 6 ++++++ .../apache/cassandra/service/AutoRepairServiceMBean.java | 3 +++ src/java/org/apache/cassandra/tools/NodeProbe.java | 5 +++++ .../cassandra/tools/nodetool/SetAutoRepairConfig.java | 6 +++++- 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 1c96ab685d2c..0cf744296e07 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -255,6 +255,12 @@ public void setAutoRepairTokenRangeSplitterParameter(String repairType, String k config.getTokenRangeSplitterInstance(RepairType.parse(repairType)).setParameter(key, value); } + @Override + public void setRepairByKeyspace(String repairType, boolean repairByKeyspace) + { + config.setRepairByKeyspace(RepairType.parse(repairType), repairByKeyspace); + } + private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig config) { StringBuilder sb = new StringBuilder(); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index f3ec919c8f1b..d57329e86795 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -68,4 +68,7 @@ public interface AutoRepairServiceMBean public Set getOnGoingRepairHostIds(String repairType); public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value); + + public void setRepairByKeyspace(String repairType, boolean repairByKeyspace); + } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index cab5153a70d3..723894caa093 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2679,6 +2679,11 @@ public Set getAutoRepairOnGoingRepairHostIds(String repairType) { return autoRepairProxy.getOnGoingRepairHostIds(repairType); } + + public void setAutoRepairRepairByKeyspace(String repairType, boolean enabled) + { + autoRepairProxy.setRepairByKeyspace(repairType, enabled); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 9f22a43c4eb9..37f809943fb6 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -48,7 +48,8 @@ public class SetAutoRepairConfig extends NodeToolCmd "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + "|parallel_repair_count|parallel_repair_percentage|materialized_view_repair_enabled|repair_max_retries" + - "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration|token_range_splitter.]", + "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration" + + "|repair_by_keyspace|token_range_splitter.]", required = true) protected List args = new ArrayList<>(); @@ -159,6 +160,9 @@ public void execute(NodeProbe probe) case "repair_session_timeout": probe.setAutoRepairSessionTimeout(repairTypeStr, paramVal); break; + case "repair_by_keyspace": + probe.setAutoRepairRepairByKeyspace(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; default: throw new IllegalArgumentException("Unknown parameter: " + paramType); } From a4d1bf3428f7e8d421c4938fdbdfca292261277b Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 20:31:05 -0700 Subject: [PATCH 227/257] Use common setup code for FixedSplitTokenRangeSplitterTest*.java --- .../FixedSplitTokenRangeSplitterHelper.java | 26 +++++++++++++++++ ...edSplitTokenRangeSplitterNoVNodesTest.java | 28 +------------------ ...ixedSplitTokenRangeSplitterVNodesTest.java | 28 +------------------ 3 files changed, 28 insertions(+), 54 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index 4b4f41375f8c..5c04351325aa 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -30,11 +30,20 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.dht.BootStrapper; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.sai.disk.format.Version; import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_NUMBER_OF_SUBRANGES; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -52,6 +61,23 @@ public class FixedSplitTokenRangeSplitterHelper private static final String TABLE3 = "tbl3"; public static final String KEYSPACE = "ks"; + public static void setupClass(int numTokens) throws Exception + { + setupSeed(); + updateConfigs(); + DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); + ServerTestUtils.prepareServerNoRegister(); + + Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); + ServerTestUtils.registerLocal(tokens); + // Ensure that the on-disk format statics are loaded before the test run + Version.LATEST.onDiskFormat(); + StorageService.instance.doAutoRepairSetup(); + + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + } + public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) { int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java index a92c7d2bc69b..5b86728e574d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java @@ -22,26 +22,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.Set; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.dht.BootStrapper; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.index.sai.disk.format.Version; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadata; - -import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; - /** * Unit tests for a setup that does not have v-nodes {@link FixedSplitTokenRangeSplitter} */ @@ -73,19 +59,7 @@ public static Collection parameters() @BeforeClass public static void setupClass() throws Exception { - setupSeed(); - updateConfigs(); - DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); - ServerTestUtils.prepareServerNoRegister(); - - Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); - ServerTestUtils.registerLocal(tokens); - // Ensure that the on-disk format statics are loaded before the test run - Version.LATEST.onDiskFormat(); - StorageService.instance.doAutoRepairSetup(); - - SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + FixedSplitTokenRangeSplitterHelper.setupClass(numTokens); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java index 1b48537968d4..8ca30bd2017c 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java @@ -22,26 +22,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.Set; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.dht.BootStrapper; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.index.sai.disk.format.Version; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadata; - -import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; -import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; - /** * Unit tests for a setup that has v-nodes {@link FixedSplitTokenRangeSplitter} */ @@ -73,19 +59,7 @@ public static Collection parameters() @BeforeClass public static void setupClass() throws Exception { - setupSeed(); - updateConfigs(); - DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); - ServerTestUtils.prepareServerNoRegister(); - - Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); - ServerTestUtils.registerLocal(tokens); - // Ensure that the on-disk format statics are loaded before the test run - Version.LATEST.onDiskFormat(); - StorageService.instance.doAutoRepairSetup(); - - SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); - QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + FixedSplitTokenRangeSplitterHelper.setupClass(numTokens); } @Test From a6bf366ac67b8fb07553499833f646c2df6cbcb4 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 9 Mar 2025 20:39:19 -0700 Subject: [PATCH 228/257] Fix FixedSplitTokenRangeSplitterHelper.java failure --- .../repair/autorepair/FixedSplitTokenRangeSplitterHelper.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index 5c04351325aa..f041873b79a7 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -27,9 +27,6 @@ import java.util.Set; import java.util.TreeSet; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; @@ -53,7 +50,6 @@ /** * Helper class for for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} */ -@RunWith(Parameterized.class) public class FixedSplitTokenRangeSplitterHelper { private static final String TABLE1 = "tbl1"; From 785aba992adce6a7fca744253c8651c0a020b716 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 10 Mar 2025 16:22:11 -0700 Subject: [PATCH 229/257] Address review comments: update comment/remove dead code --- .../repair/autorepair/FixedSplitTokenRangeSplitter.java | 6 +++--- .../autorepair/FixedSplitTokenRangeSplitterHelper.java | 7 +++---- .../FixedSplitTokenRangeSplitterNoVNodesTest.java | 2 +- .../autorepair/FixedSplitTokenRangeSplitterVNodesTest.java | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 720e2812a5b0..2879719fdb95 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -49,10 +49,10 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt * Selecting the default value is tricky. If we select a small number, individual repairs would be heavy. * On the other hand, if we select a large number, too many repair sessions would be created. *

        - * The default value of 64 works well for non-vnodes, but with vnodes, it can result in too many repair sessions - * because the total number of repair sessions on a node is calculated as: + * The default value of 64 works well for non-vnodes, but with vnodes, it might result in too many repair sessions + * and to cap the number for vnodes, we use the following formula: *

        - * number_of_subranges * num_token + * Math.max(1, numberOfSubranges / tokens.size()) *

        * To maintain balance, 32 serves as a good default that accommodates both vnodes and non-vnodes effectively. */ diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index f041873b79a7..cfa0748f1f7f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -48,7 +48,7 @@ import static org.junit.Assert.assertTrue; /** - * Helper class for for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} + * Helper class for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} */ public class FixedSplitTokenRangeSplitterHelper { @@ -111,7 +111,7 @@ public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRan List> expectedTokensForATable = new ArrayList<>(); for (int j = 0; j < assignmentsPerTable; j++) { - assertEquals(Arrays.asList(tables.get(i)), assignments.get(i * assignmentsPerTable + j).getTableNames()); + assertEquals(Collections.singletonList(tables.get(i)), assignments.get(i * assignmentsPerTable + j).getTableNames()); assignmentForATable.add(assignments.get(i * assignmentsPerTable + j)); expectedTokensForATable.add(expectedToken.get(i * assignmentsPerTable + j)); } @@ -125,7 +125,6 @@ public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSub AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); assertEquals(numTokens, tokens.size()); - List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); for (Range range : tokens) { @@ -151,7 +150,7 @@ public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSub compare(numTokens, numberOfSplits, expectedToken, assignments); } - public static void testTokenRangesWithDefaultSplit(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) + public static void testTokenRangesWithDefaultSplit(int numTokens, AutoRepairConfig.RepairType repairType) { int numberOfSplits = calcSplits(numTokens, DEFAULT_NUMBER_OF_SUBRANGES); Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java index 5b86728e574d..a30f3aa76246 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java @@ -77,6 +77,6 @@ public void testTokenRangesSplitByKeyspace() @Test public void testTokenRangesWithDefaultSplit() { - FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, numberOfSubRanges, repairType); + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, repairType); } } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java index 8ca30bd2017c..6839748d1f01 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java @@ -77,6 +77,6 @@ public void testTokenRangesSplitByKeyspace() @Test public void testTokenRangesWithDefaultSplit() { - FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, numberOfSubRanges, repairType); + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, repairType); } } From 75b45f030212fd3f7c64af603b5eac175367fb6e Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Tue, 11 Mar 2025 09:43:55 -0700 Subject: [PATCH 230/257] Adjust comment for DEFAULT_NUMBER_OF_SUBRANGES and NUMBER_OF_SUBRANGES --- .../FixedSplitTokenRangeSplitter.java | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java index 2879719fdb95..a6dddb3060bb 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -49,8 +49,8 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt * Selecting the default value is tricky. If we select a small number, individual repairs would be heavy. * On the other hand, if we select a large number, too many repair sessions would be created. *

        - * The default value of 64 works well for non-vnodes, but with vnodes, it might result in too many repair sessions - * and to cap the number for vnodes, we use the following formula: + * If vnodes are configured using num_tokens, attempts to evenly subdivide subranges by each range + * using the following formula: *

        * Math.max(1, numberOfSubranges / tokens.size()) *

        @@ -59,19 +59,12 @@ public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitt public static final int DEFAULT_NUMBER_OF_SUBRANGES = 32; /** - * The number of subranges to split each to-be-repaired token range into. Defaults to 1. + * Number of evenly split subranges to create for each node that repair runs for. *

        - * The higher this number, the smaller the repair sessions will be. - *

        - * If you are using vnodes, say 256, then the repair will always go one vnode range at a time. This parameter, - * additionally, will let us further subdivide a given vnode range into subranges. - *

        - * With the value "1" and vnodes of 256, a given table on a node will undergo the repair 256 times. But with a - * value "2", the same table on a node will undergo a repair 512 times because every vnode range will be further - * divided by two. - *

        - * If you do not use vnodes or the number of vnodes is pretty small, say 8, setting this value to a higher number, - * such as 16, will be useful to repair on a smaller range, and the chance of succeeding is higher. + * If vnodes are configured using num_tokens, attempts to evenly subdivide subranges by each range. + * For example, for num_tokens: 16 and number_of_subranges: 32, 2 (32/16) + * repair assignments will be created for each token range. At least one repair assignment will be + * created for each token range. */ static final String NUMBER_OF_SUBRANGES = "number_of_subranges"; From c9b4065c3bfd4d2f627bb19c4aceb8c94acfcfcb Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Fri, 7 Mar 2025 23:06:35 -0600 Subject: [PATCH 231/257] Auto Repair Documentation improvements, bytes_per_assignment to 50GiB Updates auto repair documentation and moves it under managing/operating/auto_repair.adoc. Updates existing repair documentation to reference the auto repair documentation. Updates bytes_per_assignment default to 50GiB (from 200GiB) for all report types to be more conservative. Clean up some syntactial issues and formatting in cassandra.yaml files. patch by Andy Tolbert; reviewed by ____ for CASSANDRA-20421 --- conf/cassandra.yaml | 69 ++- conf/cassandra_latest.yaml | 68 ++- doc/modules/cassandra/nav.adoc | 3 +- .../cassandra/pages/auto-repair/overview.adoc | 211 --------- .../pages/managing/operating/auto_repair.adoc | 410 ++++++++++++++++++ .../operating/compaction/tombstones.adoc | 3 +- .../pages/managing/operating/index.adoc | 3 +- .../pages/managing/operating/repair.adoc | 16 +- .../repair/autorepair/AutoRepairConfig.java | 9 +- .../autorepair/RepairTokenRangeSplitter.java | 147 ++----- .../autorepair/AutoRepairConfigTest.java | 13 +- 11 files changed, 567 insertions(+), 385 deletions(-) delete mode 100644 doc/modules/cassandra/pages/auto-repair/overview.adoc create mode 100644 doc/modules/cassandra/pages/managing/operating/auto_repair.adoc diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 39e0ab776964..57f053c7c07a 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2679,9 +2679,15 @@ storage_compatibility_mode: NONE # incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) # then set the ratio to 0.0 -# incremental_repair_disk_headroom_reject_ratio = 0.2; +# incremental_repair_disk_headroom_reject_ratio: 0.2; -# Configuration for AutoRepair Scheduler. +# Configuration for Auto Repair Scheduler. +# +# This feature is disabled by default. +# +# See: https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html for an overview of this +# feature. +# # auto_repair: # # Enable/Disable the auto-repair scheduler. # # If set to false, the scheduler thread will not be started. @@ -2693,17 +2699,33 @@ storage_compatibility_mode: NONE # full: # # Enable/Disable full auto-repair # enabled: true -# # Minimum duration between repairing the same node again. This is useful for tiny clusters, such as -# # clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler -# # completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node -# # until 24 hours have passed since the last repair. +# # Minimum duration between repairing the same node again. This is useful for tiny clusters, +# # such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one +# # round on all nodes in less than this duration, it will not start a new repair round on a given node until +# # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce +# # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than +# # gc_grace_seconds to avoid zombies. # min_repair_interval: 24h # token_range_splitter: +# # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges +# # for repair assignments. +# # +# # Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter, +# # FixedTokenRangeSplitter}. +# # +# # - RepairTokenRangeSplitter (default) attempts to intelligently split ranges based on data size and partition +# # count. +# # - FixedTokenRangeSplitter splits into fixed ranges based on the 'number_of_subranges' option. +# # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# +# # Optional parameters can be specified in the form of: +# # parameters: +# # param_key1: param_value1 # parameters: # # The target and maximum amount of bytes that should be included in a repair # # assignment. This scopes the amount of work involved in a repair and includes # # the data covering the range being repaired. -# bytes_per_assignment: 200GiB +# bytes_per_assignment: 50GiB # # The maximum number of bytes to cover in an individual schedule. This serves as # # a mechanism to throttle the work done in each repair cycle. You may reduce this # # value if the impact of repairs is causing too much load on the cluster or increase it @@ -2712,29 +2734,32 @@ storage_compatibility_mode: NONE # # This is set to a large value for full repair to attempt to repair all data per repair schedule. # max_bytes_per_schedule: 100000GiB # incremental: -# enabled: true -# # Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps -# # the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent -# # schedule such as 1h is recommended. -# # When turning on incremental repair for the first time with a decent amount of data it may be advisable to -# # increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. -# min_repair_interval: 1h +# enabled: false +# # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair +# # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, +# # so a more frequent schedule such as 1h is recommended. +# # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to +# # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental +# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep to unrepaired +# # set small. +# min_repair_interval: 24h # token_range_splitter: # parameters: # # Configured to attempt repairing 50GiB of data per repair. -# # This throttles the amount of incremental repair and anticompaction done per schedule -# # after incremental repairs are turned on. +# # This throttles the amount of incremental repair and anticompaction done per schedule after incremental +# # repairs are turned on. # bytes_per_assignment: 50GiB -# # The maximum number of bytes to cover in an individual schedule to 100GiB. -# # Consider increasing max_bytes_per_schedule if -# # more data is written than this limit within the min_repair_interval. +# # Resricts the maximum number of bytes to cover in an individual schedule to 100GiB. +# # Consider increasing this if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: -# enabled: true +# # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired +# # data set. +# enabled: false # min_repair_interval: 24h # token_range_splitter: # parameters: -# bytes_per_assignment: 200GiB +# bytes_per_assignment: 50GiB # max_bytes_per_schedule: 100000GiB # # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule # # repairs. @@ -2760,7 +2785,7 @@ storage_compatibility_mode: NONE # # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. # parallel_repair_count: 3 # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value -# # is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. +# # is used. # parallel_repair_percentage: 3 # # Repairs materialized views if true. # materialized_view_repair_enabled: false diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index da44d4f7b999..e3d0c2e89d23 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2374,9 +2374,15 @@ storage_compatibility_mode: NONE # incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) # then set the ratio to 0.0 -# incremental_repair_disk_headroom_reject_ratio = 0.2; +# incremental_repair_disk_headroom_reject_ratio: 0.2; -# Configuration for AutoRepair Scheduler. +# Configuration for Auto Repair Scheduler. +# +# This feature is disabled by default. +# +# See: https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html for an overview of this +# feature. +# # auto_repair: # # Enable/Disable the auto-repair scheduler. # # If set to false, the scheduler thread will not be started. @@ -2388,17 +2394,33 @@ storage_compatibility_mode: NONE # full: # # Enable/Disable full auto-repair # enabled: true -# # Minimum duration between repairing the same node again. This is useful for tiny clusters, such as -# # clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler -# # completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node -# # until 24 hours have passed since the last repair. +# # Minimum duration between repairing the same node again. This is useful for tiny clusters, +# # such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one +# # round on all nodes in less than this duration, it will not start a new repair round on a given node until +# # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce +# # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than +# # gc_grace_seconds to avoid zombies. # min_repair_interval: 24h # token_range_splitter: +# # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges +# # for repair assignments. +# # +# # Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter, +# # FixedTokenRangeSplitter}. +# # +# # - RepairTokenRangeSplitter (default) attempts to intelligently split ranges based on data size and partition +# # count. +# # - FixedTokenRangeSplitter splits into fixed ranges based on the 'number_of_subranges' option. +# # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# +# # Optional parameters can be specified in the form of: +# # parameters: +# # param_key1: param_value1 # parameters: # # The target and maximum amount of bytes that should be included in a repair # # assignment. This scopes the amount of work involved in a repair and includes # # the data covering the range being repaired. -# bytes_per_assignment: 200GiB +# bytes_per_assignment: 50GiB # # The maximum number of bytes to cover in an individual schedule. This serves as # # a mechanism to throttle the work done in each repair cycle. You may reduce this # # value if the impact of repairs is causing too much load on the cluster or increase it @@ -2407,29 +2429,33 @@ storage_compatibility_mode: NONE # # This is set to a large value for full repair to attempt to repair all data per repair schedule. # max_bytes_per_schedule: 100000GiB # incremental: +# # Enable incremental repair by default for new clusters. # enabled: true -# # Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps -# # the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent -# # schedule such as 1h is recommended. -# # When turning on incremental repair for the first time with a decent amount of data it may be advisable to -# # increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. -# min_repair_interval: 1h +# # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair +# # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, +# # so a more frequent schedule such as 1h is recommended. +# # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to +# # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental +# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep to unrepaired +# # set small. +# min_repair_interval: 24h # token_range_splitter: # parameters: # # Configured to attempt repairing 50GiB of data per repair. -# # This throttles the amount of incremental repair and anticompaction done per schedule -# # after incremental repairs are turned on. +# # This throttles the amount of incremental repair and anticompaction done per schedule after incremental +# # repairs are turned on. # bytes_per_assignment: 50GiB -# # The maximum number of bytes to cover in an individual schedule to 100GiB. -# # Consider increasing max_bytes_per_schedule if -# # more data is written than this limit within the min_repair_interval. +# # Resricts the maximum number of bytes to cover in an individual schedule to 100GiB. +# # Consider increasing this if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: -# enabled: true +# # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired +# # data set. +# enabled: false # min_repair_interval: 24h # token_range_splitter: # parameters: -# bytes_per_assignment: 200GiB +# bytes_per_assignment: 50GiB # max_bytes_per_schedule: 100000GiB # # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule # # repairs. @@ -2455,7 +2481,7 @@ storage_compatibility_mode: NONE # # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. # parallel_repair_count: 3 # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value -# # is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. +# # is used. # parallel_repair_percentage: 3 # # Repairs materialized views if true. # materialized_view_repair_enabled: false diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc index dd9dd1054df9..311bfb16c585 100644 --- a/doc/modules/cassandra/nav.adoc +++ b/doc/modules/cassandra/nav.adoc @@ -99,6 +99,7 @@ ***** xref:cassandra:managing/operating/fqllogging.adoc[Full query logging] **** xref:cassandra:managing/operating/metrics.adoc[Monitoring metrics] **** xref:cassandra:managing/operating/repair.adoc[Repair] +**** xref:cassandra:managing/operating/auto_repair.adoc[Auto Repair] **** xref:cassandra:managing/operating/read_repair.adoc[Read repair] **** xref:cassandra:managing/operating/security.adoc[Security] **** xref:cassandra:managing/operating/snitch.adoc[Snitches] @@ -126,4 +127,4 @@ *** xref:reference/static.adoc[Static columns] *** xref:reference/vector-data-type.adoc[Vector data type] -** xref:integrating/plugins/index.adoc[] \ No newline at end of file +** xref:integrating/plugins/index.adoc[] diff --git a/doc/modules/cassandra/pages/auto-repair/overview.adoc b/doc/modules/cassandra/pages/auto-repair/overview.adoc deleted file mode 100644 index 698e53c44677..000000000000 --- a/doc/modules/cassandra/pages/auto-repair/overview.adoc +++ /dev/null @@ -1,211 +0,0 @@ -= Overview of Auto Repair -:navtitle: Auto Repair overview -:description: Auto Repair concepts - How it works, how to configure it, and more. -:keywords: CEP-37 - -Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This -significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit and -manage repairs. - -At a high level, a dedicated thread pool is assigned to the repair scheduler. The repair scheduler in Cassandra -maintains a new replicated table, system_distributed.auto_repair_history, which stores the repair history for all nodes, -including details such as the last repair time. The scheduler selects the node(s) to begin repairs and orchestrates the -process to ensure that every table and its token ranges are repaired. The algorithm can run repairs simultaneously on -multiple nodes and splits token ranges into subranges, with necessary retries to handle transient failures. Automatic -repair starts as soon as a Cassandra cluster is launched, similar to compaction, and does not require human -intervention. - -The scheduler currently supports Full, Incremental, and Preview repair types with the following features. New repair -types, such as Paxos repair or other future repair mechanisms, can be integrated with minimal development effort! - - -=== Features -- Capability to run repairs on multiple nodes simultaneously. -- A default implementation and an interface to override the dataset being repaired per session. -- Extendable token split algorithms with two implementations readily available: -. Splits token ranges by placing a cap on the size of data repaired in one session and a maximum cap at the schedule -level (default). -. Splits tokens evenly based on the specified number of splits. -- A new CQL table property offering: -. The ability to disable specific repair types at the table level, allowing the scheduler to skip one or more tables. -. Configuring repair priorities for certain tables to prioritize them over others. -- Dynamic enablement or disablement of the scheduler for each repair type. -- Configurable settings tailored to each repair job. -- Rich configuration options for each repair type (e.g., Full, Incremental, or Preview repairs). -- Comprehensive observability features that allow operators to configure alarms as needed. - -=== Yaml Configuration - -==== Top level settings -The following settings are defined at the top level of the configuration file and apply universally across all -repair types. - -[cols=",,",options="header",] -|=== -| Name | Default | Description -| enabled | false | Enable/Disable the auto-repair scheduler. If set to false, the scheduler thread will not be started. -If set to true, the repair scheduler thread will be created. The thread will check for secondary configuration available -for each repair type (full, incremental, and preview_repaired), and based on that, it will schedule repairs. -| repair_check_interval | 5m | Time interval between successive checks to see if ongoing repairs are complete or if it -is time to schedule repairs. -| repair_max_retries | 3 | Maximum number of retries for a repair session. -| repair_retry_backoff | 30s | Backoff time before retrying a repair session. -| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the -scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. -| history_clear_delete_hosts_buffer_interval | 2h | The scheduler needs to adjust its order when nodes leave the ring. -Deleted hosts are tracked in metadata. for a specified duration to ensure they are indeed removed before adjustments -are made to the schedule. -|=== - - -==== Repair level settings -The following settings can be tailored individually for each repair type. -[cols=",,",options="header",] -|=== -| Name | Default | Description -| enabled | false | Enable/Disable full/incremental/preview_repaired auto-repair -| repair_by_keyspace | true | If true, attempts to group tables in the same keyspace into one repair; otherwise, -each table is repaired individually. -| number_of_repair_threads | 1 | Number of threads to use for each repair job scheduled by the scheduler. Similar to -the -j option in nodetool repair. -| parallel_repair_count | 3 | Number of nodes running repair in parallel. If parallel_repair_percentage is set, the -larger value is used. -| parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If -parallel_repair_count is set, the larger value is used. Recommendation is that the repair cycle on the cluster should -finish within gc_grace_seconds. -| materialized_view_repair_enabled | false | Repairs materialized views if true. -| initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting -immediately after a restart. -| repair_session_timeout | 3h | Timeout for resuming stuck repair sessions. -| force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. -| sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 -SSTables to avoid penalizing good tables. -| table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair -proceeds to the next table. -| ignore_dcs | [] | Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify -data centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded data -centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to not run repair -schedule in certain data centers. -| repair_primary_token_range_only | true | Repair only the primary ranges owned by a node. Equivalent to the -pr option -in nodetool repair. Defaults to true. General advice is to keep this true. -| token_range_splitter | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Splitter implementation to -generate repair assignments. Defaults to RepairTokenRangeSplitter. -| token_range_splitter.partitions_per_assignment | 1048576 | Maximum number of partitions to include in a repair -assignment. Used to reduce number of partitions present in merkle tree leaf nodes to avoid overstreaming. -| token_range_splitter.max_tables_per_assignment | 64 | Maximum number of tables to include in a repair assignment. -This reduces the number of repairs, especially in keyspaces with many tables. The splitter avoids batching tables -together if they exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment. -|=== - -==== Full & Preview_Repaired repair level settings -The following settings can be tailored individually for each repair type. -[cols=",,",options="header",] -|=== -| min_repair_interval | 24h | Minimum duration between repairing the same node again. This is useful for tiny clusters, -such as clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler -completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node until 24 -hours have passed since the last repair. -| token_range_splitter.bytes_per_assignment | 200GiB | The target and maximum amount of bytes that should be included -in a repair assignment. This scopes the amount of work involved in a repair and includes the data covering the range -being repaired. -| token_range_splitter.max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover in an individual -schedule. This serves as a mechanism to throttle the work done in each repair cycle. You may reduce this value if the -impact of repairs is causing too much load on the cluster or increase it if writes outpace the amount of data being -repaired. Alternatively, adjust the min_repair_interval. This is set to a large value for full repair to attempt to -repair all data per repair schedule. -|=== - -==== Incremental repair level settings -The following settings can be customized for each repair type. -[cols=",,",options="header",] -|=== -| min_repair_interval | 1h | Incremental repairs operate over unrepaired data and should finish quickly. Running them -more frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, -so a more frequent schedule such as 1h is recommended. When turning on incremental repair for the first time with a -decent amount of data it may be advisable to increase this interval to 24h or longer to reduce the impact of -anticompaction caused by incremental repair. -| token_range_splitter.bytes_per_assignment | 50GiB | Configured to attempt repairing 50GiB of data per repair. -This throttles the amount of incremental repair and anticompaction done per schedule after incremental repairs are -turned on. -| token_range_splitter.max_bytes_per_schedule | 100GiB | The maximum number of bytes to cover in an individual schedule -to 100GiB. Consider increasing max_bytes_per_schedule if more data is written than this limit within -the min_repair_interval. -|=== - - -=== Nodetool Configuration -==== nodetool getautorepairconfig -``` -$> nodetool getautorepairconfig -repair scheduler configuration: - repair_check_interval: 5m - repair_max_retries: 3 - repair_retry_backoff: 30s - repair_task_min_duration: 5s - history_clear_delete_hosts_buffer_interval: 2h -configuration for repair_type: full - enabled: true - min_repair_interval: 24h - repair_by_keyspace: true - number_of_repair_threads: 1 - sstable_upper_threshold: 10000 - table_max_repair_time: 6h - ignore_dcs: [] - repair_primary_token_range_only: true - parallel_repair_count: 3 - parallel_repair_percentage: 3 - materialized_view_repair_enabled: false - initial_scheduler_delay: 5m - repair_session_timeout: 3h - force_repair_new_node: false - token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter - token_range_splitter.bytes_per_assignment: 200GiB - token_range_splitter.partitions_per_assignment: 1048576 - token_range_splitter.max_tables_per_assignment: 64 - token_range_splitter.max_bytes_per_schedule: 100000GiB -configuration for repair_type: incremental - enabled: true - min_repair_interval: 1h - repair_by_keyspace: true - number_of_repair_threads: 1 - sstable_upper_threshold: 10000 - table_max_repair_time: 6h - ignore_dcs: [] - repair_primary_token_range_only: true - parallel_repair_count: 3 - parallel_repair_percentage: 3 - materialized_view_repair_enabled: false - initial_scheduler_delay: 5m - repair_session_timeout: 3h - force_repair_new_node: false - token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter - token_range_splitter.bytes_per_assignment: 50GiB - token_range_splitter.partitions_per_assignment: 1048576 - token_range_splitter.max_tables_per_assignment: 64 - token_range_splitter.max_bytes_per_schedule: 100GiB -configuration for repair_type: preview_repaired - enabled: false -``` - -==== nodetool autorepairstatus -``` -$> nodetool autorepairstatus -t incremental -Active Repairs -425cea55-09aa-46e0-8911-9f37a4424574 - - -$> nodetool autorepairstatus -t full -Active Repairs -NONE - -``` - -==== nodetool setautorepairconfig -``` -$> nodetool setautorepairconfig -t incremental number_of_repair_threads 2 -``` - - - -==== More details -https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution[CEP-37] \ No newline at end of file diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc new file mode 100644 index 000000000000..db50a8c4524b --- /dev/null +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -0,0 +1,410 @@ += Auto Repair +:navtitle: Auto Repair +:description: Auto Repair concepts - How it works, how to configure it, and more. +:keywords: CEP-37, Repair, Incremental, Preview + +Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This +significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit and +manage repairs. + +At a high level, a dedicated thread pool is assigned to the repair scheduler. The repair scheduler in Cassandra +maintains a new replicated table, `system_distributed.auto_repair_history`, which stores the repair history for all +nodes, including details such as the last repair time. The scheduler selects the node(s) to begin repairs and +orchestrates the process to ensure that every table and its token ranges are repaired. + +The algorithm can run repairs simultaneously on multiple nodes and splits token ranges into subranges, with necessary +retries to handle transient failures. Automatic repair starts as soon as a Cassandra cluster is launched, similar to +compaction, and if configured appropriately, does not require human intervention. + +The scheduler currently supports Full, Incremental, and Preview repair types with the following features. New repair +types, such as Paxos repair or other future repair mechanisms, can be integrated with minimal development effort! + + +== Features +- Capability to run repairs on multiple nodes simultaneously. +- A default implementation and an interface to override the dataset being repaired per session. +- Extendable token split algorithms with two implementations readily available: +. Splits token ranges by placing a cap on the size of data repaired in one session and a maximum cap at the schedule +level using xref:#repair-token-range-splitter[RepairTokenRangeSplitter] (default). +. Splits tokens evenly based on the specified number of splits using +xref:#fixed-split-token-range-splitter[FixedSplitTokenRangeSplitter]. +- A new xref:#table-configuration[CQL table property] (`auto_repair`) offering: +. The ability to disable specific repair types at the table level, allowing the scheduler to skip one or more tables. +. Configuring repair priorities for certain tables to prioritize them over others. +- Dynamic enablement or disablement of the scheduler for each repair type. +- Configurable settings tailored to each repair job. +- Rich configuration options for each repair type (e.g., Full, Incremental, or Preview repairs). +- Comprehensive observability features that allow operators to configure alarms as needed. + +== Considerations + +Before enabling auto repair, please consult the xref:managing/operating/repair.adoc[Repair] guide to establish a base +understanding of repairs. + +=== Full Repair + +Full Repairs operate over all data in the token range being repaired. It is therefore important to run full repair +with a longer schedule and with smaller assignments. + +=== Incremental Repair + +When enabled from the inception of a cluster, incremental repairs operate over unrepaired data and should finish +quickly when run more frequently. + +Once incremental repair has been run, SSTables will be separated between data that have been incrementally repaired +and data that have not. Therefore, it is important to continually run incremental repair once it has been enabled so +newly written data can be compacted together with previously repaired data, allowing overwritten and expired data to +be eventually purged. + +Running incremental repair more frequently keeps the unrepaired set smaller and thus causes repairs to operate over +a smaller set of data, so a shorter `min_repair_interval` such as `1h` is recommended for new clusters. + +NOTE: When turning on incremental repair for the first time on an existing cluser with a large amount of data it is +advisable to use a larger interval such as `24h` or longer to reduce the impact of anticompaction caused by incremental +repair. Once the entire token range has been repaired, one may consider reducing this to a shorter interval. It is +strongly recommended to not enable incremental repair for an existing cluster unless full repair has previously +actively run. + +=== Previewing Repaired Data + +The `preview_repaired` repair type executes repairs over the repaired data set to detect possible data inconsistencies. + +Inconsistencies in the repaired data set should not happen in practice and could indicate a possible bug in incremental +repair. + +Running preview repairs is useful when considering using the +xref:cassandra:managing/operating/compaction/tombstones.adoc#deletion[only_purge_repaired_tombstones] table compaction +option to prevent data from possibly being resurrected when inconsistent replicas are missing tombstones from deletes. + +When enabled, the `BytesPreviewedDesynchronized` and `TokenRangesPreviewedDesynchronized` +xref:cassandra:managing/operating/metrics.adoc#table-metrics[table metrics] can be used to detect inconsistencies in the +repaired data set. + +== Configuring auto repair in cassandra.yaml + +Configuration for auto repair is managed in the `cassandra.yaml` file by the `auto_repair` property. + +A rich set of configuration exists for configuring auto repair with sensible defaults. However, the expectation +is that some tuning might be needed particulary when it comes to tuning how often repair should run +(`min_repair_interval`) and how repair assignments as created (`token_range_splitter`). + +The following is a practical example of an auto_repair configuration that one might use. + +[source, yaml] +---- +auto_repair: + enabled: true + repair_type_overrides: + full: + enabled: true + min_repair_interval: 5d + incremental: + enabled: true + min_repair_interval: 1h + token_range_splitter: + parameters: + bytes_per_assignment: 50GiB + max_bytes_per_schedule: 100GiB + preview_repaired: + enabled: true + min_repair_interval: 1d + global_settings: + repair_by_keyspace: true + parallel_repair_count: 1 +---- + + +=== Top level settings +The following settings are defined at the top level of the configuration file and apply universally across all +repair types. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| enabled | false | Enable/Disable the auto-repair scheduler. If set to false, the scheduler thread will not be started. +If set to true, the repair scheduler thread will be created. The thread will check for secondary configuration available +for each repair type (full, incremental, and preview_repaired), and based on that, it will schedule repairs. +| repair_check_interval | 5m | Time interval between successive checks to see if ongoing repairs are complete or if it +is time to schedule repairs. +| repair_max_retries | 3 | Maximum number of retries for a repair session. +| repair_retry_backoff | 30s | Backoff time before retrying a repair session. +| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the +scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. +| history_clear_delete_hosts_buffer_interval | 2h | The scheduler needs to adjust its order when nodes leave the ring. +Deleted hosts are tracked in metadata for a specified duration to ensure they are indeed removed before adjustments +are made to the schedule. +|=== + + +=== Repair level settings +The following settings can be configured globally using `global_settings` or tailored individually for each repair +type by using `repair_type_overrides`. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| enabled | false | Whether the given repair types should be enabled +| min_repair_interval | 24h | Minimum duration between repairing the same node again. This is useful for tiny clusters, +such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one round on all +nodes in less than this duration, it will not start a new repair round on a given node until this much time has +passed since the last repair completed. Consider increasing to a larger value to reduce the impact of repairs, +however *note that one should attempt to run repairs at a smaller interval than gc_grace_seconds to +avoid xref:cassandra:managing/operating/compaction/tombstones.adoc#zombies[zombies]*. +| class_name | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Implementation of +IAutoRepairTokenRangeSplitter to use; responsible for splitting token ranges for repair assignments. Out of the box, +Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter,FixedTokenRangeSplitter}. +| repair_by_keyspace | true | If true, attempts to group tables in the same keyspace into one repair; otherwise, +each table is repaired individually. +| number_of_repair_threads | 1 | Number of threads to use for each repair job scheduled by the scheduler. Similar to +the -j option in nodetool repair. +| parallel_repair_count | 3 | Number of nodes running repair in parallel. If `parallel_repair_percentage` is set, the +larger value is used. +| parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If +`parallel_repair_count is set`, the larger value is used. +| materialized_view_repair_enabled | false | Repairs materialized views if true. +| initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting +immediately after a restart. +| repair_session_timeout | 3h | Timeout for resuming stuck repair sessions. +| force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. +| sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. +| table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair +proceeds to the next table. +| ignore_dcs | [] | Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify +data centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded data +centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to not run repair +schedule in certain data centers. +| repair_primary_token_range_only | true | Repair only the primary ranges owned by a node. Equivalent to the -pr option +in nodetool repair. General advice is to keep this true. +|=== + +=== `RepairTokenRangeSplitter` configuration +[#repair-token-range-splitter] + +`RepairTokenRangeSplitter` is the default implementation of `IAutoRepairTokenRangeSplitter` that attempts to create +token range assignments meeting the following goals: + +1. *Create smaller, consistent repair times*: Long repairs, such as those lasting 15 hours, can be problematic. If a +node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact of +disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration becoming +the main bottleneck. + +2. *Minimize the impact on hosts*: Repairs should not heavily affect the host systems. For incremental repairs, this +might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide partitions +can lead to issues with disk usage and higher compaction costs. + +3. *Reduce overstreaming*: The Merkle tree, which represents data within each partition and range, has a maximum size. +If a repair covers too many partitions, the tree’s leaves represent larger data ranges. Even a small change in a leaf +can trigger excessive data streaming, making the process inefficient. + +4. *Reduce number of repairs*: If there are many small tables, it's beneficial to batch these tables together under a +single parent repair. This prevents the repair overhead from becoming a bottleneck, especially when dealing with +hundreds of tables. Running individual repairs for each table can significantly impact performance and efficiency. + +To achieve these goals, this implementation inspects SSTable metadata to estimate the bytes and number of partitions +within a range and splits it accordingly to bound the size of the token ranges used for repair assignments. + +==== Parameter defaults + +The following `parameters` include the same defaults for all repair types. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| partitions_per_assignment | 1048576 | Maximum number of partitions to include in a repair +assignment. Used to reduce number of partitions present in merkle tree leaf nodes to avoid overstreaming. +| max_tables_per_assignment | 64 | Maximum number of tables to include in a repair assignment. +This reduces the number of repairs, especially in keyspaces with many tables. The splitter avoids batching tables +together if they exceed other configuration parameters like `bytes_per_assignment` or `partitions_per_assignment`. +|=== + +==== Full & Preview Repaired repair defaults + +The following `parameters` defaults are established for both `full` and `preview_repaired` repair scheduling: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| bytes_per_assignment | 50GiB | The target and maximum amount of *compressed* bytes that should be included in a +repair assignment. *Note*: For full and preview_repaired, only the portion of an SSTable that covers the ranges +being repaired are accounted for in this calculation. +| max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover in an individual +schedule. This serves as a mechanism to throttle the work done in each repair cycle. You may reduce this value if the +impact of repairs is causing too much load on the cluster or increase it if writes outpace the amount of data being +repaired. Alternatively, adjust the `min_repair_interval`. This is set to a large value for full repair to attempt to +repair all data per repair schedule. +|=== + +==== Incremental repair defaults + +The following `parameters` defaults are established for `incremental` repair scheduling: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| bytes_per_assignment | 50GiB | The target and maximum amount of *compressed* bytes that should be +included in a repair assignment. *Note*: For incremental repair, the *entire size* of *unrepaired* SSTables +including ranges being repaired are accounted for in this calculation. This is to account for the anticompaction +work required to split the candidate data to repair from the data that won't be repaired. +| max_bytes_per_schedule | 100GiB | The maximum number of bytes to cover in an individual schedule. +Consider increasing if more data is written than this limit within the `min_repair_interval`. +|=== + +=== `FixedSplitTokenRangeSplitter` configuration +[#fixed-split-token-range-splitter] + +`FixedSplitTokenRangeSplitter` is a more simple implementation of `IAutoRepairTokenRangeSplitter` that creates repair +assignments by splitting a node's token ranges into an even number of splits. + +The following `parameters` apply for `FixedSplitTokenRangeSplitter` configuration: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| number_of_subranges | 64 | Number of evenly split subranges to create for each node that repair runs for. +If vnodes are configured using `num_tokens`, attempts to evenly subdivide subranges by each range. For example, for +`num_tokens: 16` and `number_of_subranges: 64`, 4 (64/16) repair assignments will be created for each token range. At +least one repair assignment will be created for each token range. +|=== + +=== Other cassandra.yaml Considerations + +==== Enable `reject_repair_compaction_threshold` + +When enabling auto_repair, it is advisable to configure the top level `reject_repair_compaction_threshold` +configuration in cassandra.yaml as a backpressure mechanism to reject new repairs on instances that have many +pending compactions. + +==== Tune `incremental_repair_disk_headroom_reject_ratio` + +By default, incremental repairs will be rejected if less than 20% of disk is available. If one wishes to be +conservative this top level configuration could be increased to a larger value to prevent filling your data directories. + +== Table configuration + +If auto repair is enabled in cassandra.yaml, the `auto_repair` property may be optionally configured at the table +level, e.g.: + +[source,cql] +---- +ALTER TABLE cycling.cyclist_races +WITH auto_repair = {'incremental_enabled': 'false', 'priority': '0'}; +---- + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| priority | 0 | Indicates the priority at which this table should be given when issuing repairs. The higher the number +the more priority will be given to repair the table (e.g. 3 will be repaired before 2). When `repair_by_keyspace` is +set to `true` tables sharing the same priority may be grouped in the same repair assignment. +| full_enabled | true | Whether full repair is enabled for this table. If full.enabled is not true in cassandra.yaml +this will not be evaluated. +| incremental_enabled | true | Whether incremental repair is enabled for this table. If incremental.enabled is not +true in cassandra.yaml this will not be evaluated. +| preview_repaired_enabled | true | Whether preview repair is enabled for this table. If preview_repaired.enabled is +not true in cassandra.yaml this will not be evaluated. +|=== + +== Nodetool Configuration +=== nodetool getautorepairconfig + +Retrieves the runtime configuration of Auto Repair for the targeted node. + +[source,none] +---- +$> nodetool getautorepairconfig +repair scheduler configuration: + repair_check_interval: 5m + repair_max_retries: 3 + repair_retry_backoff: 30s + repair_task_min_duration: 5s + history_clear_delete_hosts_buffer_interval: 2h +configuration for repair_type: full + enabled: true + min_repair_interval: 24h + repair_by_keyspace: true + number_of_repair_threads: 1 + sstable_upper_threshold: 10000 + table_max_repair_time: 6h + ignore_dcs: [] + repair_primary_token_range_only: true + parallel_repair_count: 3 + parallel_repair_percentage: 3 + materialized_view_repair_enabled: false + initial_scheduler_delay: 5m + repair_session_timeout: 3h + force_repair_new_node: false + token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter + token_range_splitter.bytes_per_assignment: 50GiB + token_range_splitter.partitions_per_assignment: 1048576 + token_range_splitter.max_tables_per_assignment: 64 + token_range_splitter.max_bytes_per_schedule: 100000GiB +configuration for repair_type: incremental + enabled: true + min_repair_interval: 1h + repair_by_keyspace: true + number_of_repair_threads: 1 + sstable_upper_threshold: 10000 + table_max_repair_time: 6h + ignore_dcs: [] + repair_primary_token_range_only: true + parallel_repair_count: 3 + parallel_repair_percentage: 3 + materialized_view_repair_enabled: false + initial_scheduler_delay: 5m + repair_session_timeout: 3h + force_repair_new_node: false + token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter + token_range_splitter.bytes_per_assignment: 50GiB + token_range_splitter.partitions_per_assignment: 1048576 + token_range_splitter.max_tables_per_assignment: 64 + token_range_splitter.max_bytes_per_schedule: 100GiB +configuration for repair_type: preview_repaired + enabled: false +---- + +=== nodetool autorepairstatus + +Provides currently running Auto Repair status. + +[source,none] +---- +$> nodetool autorepairstatus -t incremental +Active Repairs +425cea55-09aa-46e0-8911-9f37a4424574 + + +$> nodetool autorepairstatus -t full +Active Repairs +NONE + +---- + +=== nodetool setautorepairconfig + +Dynamic configuration changes can be made by using `setautorepairconfig`. Note that this only applies on the node being +targeted and these changes are not retained when a node is bounced. + +The following disables the `incremental` repair schedule: + +[source,none] +---- +$> nodetool setautorepairconfig -t incremental enabled false +---- + +The following adjusts the `min_repair_interval` option to `5d` specifically for the `full` repair schedule: + +[source,none] +---- +$> nodetool setautorepairconfig -t full min_repair_interval 5d +---- + +The following configures the `bytes_per_assignment` parameter for `incremental` repair's `token_range_splitter` to +`10GiB`: + +[source,none] +---- +$> nodetool setautorepairconfig -t incremental token_range_splitter.bytes_per_assignment 10GiB +---- + +==== More details +https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution[CEP-37] diff --git a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc index 9e0dcb6f7879..48b1a9b62e75 100644 --- a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc +++ b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc @@ -24,7 +24,6 @@ This approach is used instead of removing values because of the distributed natu Once an object is marked as a tombstone, queries will ignore all values that are time-stamped previous to the tombstone insertion. == Zombies - In a multi-node cluster, {cassandra} may store replicas of the same data on two or more nodes. This helps prevent data loss, but it complicates the deletion process. If a node receives a delete command for data it stores locally, the node tombstones the specified object and tries to pass the tombstone to other nodes containing replicas of that object. @@ -50,7 +49,7 @@ But if the node does not recover until after the grace period ends, {cassandra} After the tombstone's grace period ends, {cassandra} deletes the tombstone during compaction. -== Deletion +== Deletion After `gc_grace_seconds` has expired the tombstone may be removed (meaning there will no longer be any object that a certain piece of data was deleted). diff --git a/doc/modules/cassandra/pages/managing/operating/index.adoc b/doc/modules/cassandra/pages/managing/operating/index.adoc index 39dd508c4593..492af4dfec3b 100644 --- a/doc/modules/cassandra/pages/managing/operating/index.adoc +++ b/doc/modules/cassandra/pages/managing/operating/index.adoc @@ -14,7 +14,8 @@ * xref:cassandra:managing/operating/metrics.adoc[Monitoring metrics] * xref:cassandra:managing/operating/repair.adoc[Repair] * xref:cassandra:managing/operating/read_repair.adoc[Read repair] +* xref:cassandra:managing/operating/auto_repair.adoc[Auto Repair] * xref:cassandra:managing/operating/security.adoc[Security] * xref:cassandra:managing/operating/topo_changes.adoc[Topology changes] * xref:cassandra:managing/operating/transientreplication.adoc[Transient replication] -* xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] \ No newline at end of file +* xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] diff --git a/doc/modules/cassandra/pages/managing/operating/repair.adoc b/doc/modules/cassandra/pages/managing/operating/repair.adoc index 1823a6d4ef95..1d0cc977faee 100644 --- a/doc/modules/cassandra/pages/managing/operating/repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/repair.adoc @@ -29,10 +29,18 @@ for syncing up missed writes, but it doesn't protect against things like disk corruption, data loss by operator error, or bugs in Cassandra. For this reason, full repairs should still be run occasionally. -== Usage and Best Practices +== Automated Repair Scheduling -Since repair can result in a lot of disk and network io, it's not run -automatically by Cassandra. It is run by the operator via nodetool. +Since repair can result in a lot of disk and network io, it has +traditionally not been run automatically by Cassandra. + +In the latest version of Cassandra, a new feature called +xref:managing/operating/auto_repair.adoc[auto repair] was introduced to +allow Cassandra to run repairs automatically on a schedule. + +== Submitting Repairs Using Nodetool + +Repairs can also be run by the operator via nodetool. Incremental repair is the default and is run with the following command: @@ -63,7 +71,7 @@ nodetool repair [options] ---- -The repair command repairs token ranges only on the node being repaired; it does not repair the whole cluster. +The repair command repairs token ranges only on the node being repaired; it does not repair the whole cluster. By default, repair operates on all token ranges replicated by the node on which repair is run, causing duplicate work when running it on every node. Avoid duplicate work by using the `-pr` flag to repair only the "primary" ranges on a node. Do a full cluster repair by running the `nodetool repair -pr` command on each node in each datacenter in the cluster, until all of the nodes and datacenters are repaired. diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index ae3a70476993..c352c48b1140 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -412,14 +412,6 @@ private static Map initializeDefaultOption options.put(RepairType.INCREMENTAL, getDefaultOptions()); options.put(RepairType.PREVIEW_REPAIRED, getDefaultOptions()); - - options.get(RepairType.FULL).min_repair_interval = new DurationSpec.IntSecondsBound("24h"); - // Incremental repairs operate over unrepaired data and should finish quickly. Running them more frequently keeps - // the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a more frequent - // increase this interval to 24h or longer to reduce the impact of anticompaction caused by incremental repair. - options.get(RepairType.INCREMENTAL).min_repair_interval = new DurationSpec.IntSecondsBound("1h"); - options.get(RepairType.PREVIEW_REPAIRED).min_repair_interval = new DurationSpec.IntSecondsBound("24h"); - return options; } @@ -461,6 +453,7 @@ protected static Options getDefaultOptions() opts.token_range_splitter = new ParameterizedClass(DEFAULT_SPLITTER.getName(), Collections.emptyMap()); opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); + opts.min_repair_interval = new DurationSpec.IntSecondsBound("24h"); return opts; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index d746d81b71f6..ea0973729873 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -65,104 +65,27 @@ import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; /** - * In Apache Cassandra, tuning repair ranges has four main goals: + * The default implementation of {@link IAutoRepairTokenRangeSplitter} that attempts to: *

          - *
        1. - * Create smaller, consistent repair times: Long repairs, such as those lasting 15 hours, can be problematic. - * If a node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact - * of disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration - * becoming the main bottleneck. - *
        2. - *
        3. - * Minimize the impact on hosts: Repairs should not heavily affect the host systems. For incremental repairs, - * this might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide - * partitions—can lead to issues with disk usage and higher compaction costs. - *
        4. - *
        5. - * Reduce overstreaming: The Merkle tree, which represents data within each partition and range, - * has a maximum size. If a repair covers too many partitions, the tree’s leaves represent larger data ranges. - * Even a small change in a leaf can trigger excessive data streaming, making the process inefficient. - *
        6. - *
        7. - * Reduce number of repairs: If there are many small tables, it's beneficial to batch these tables together - * under a single parent repair. This prevents the repair overhead from becoming a bottleneck, especially when - * dealing with hundreds of tables. Running individual repairs for each table can significantly impact performance - * and efficiency. - *
        8. + *
        9. Create smaller, consistent repair times
        10. + *
        11. Minimize the impact on hosts
        12. + *
        13. Reduce overstreaming
        14. + *
        15. Reduce number of repairs
        16. *
        - * To manage these issues, the strategy involves estimating the size and number of partitions within a range and - * splitting it accordingly to bound the size of the range splits. This is established by iterating over SSTable - * index files to estimate the amount of bytes and partitions involved in the ranges being repaired and by what - * repair type is being invoked. - *

        + *

        + * To achieve these goals, this implementation inspects SSTable metadata to estimate the bytes and number of partitions + * within a range and splits it accordingly to bound the size of the token ranges used for repair assignments. + *

        + *

        + * Refer to + * Auto Repair documentation for this implementation + * for a more thorough breakdown of this implementation. + *

        + *

        * While this splitter has a lot of tuning parameters, the expectation is that the established default configuration * shall be sensible for all {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType}'s. The following * configuration parameters are offered. - *

          - *
        • - * bytes_per_assigment: The target and maximum amount of bytes that should be included in a repair - * assignment. This is meant to scope the amount of work involved in a repair. For incremental repair, this - * involves the total number of bytes in all SSTables containing unrepaired data involving the ranges being - * repaired, including data that doesn't cover the range. This is to account for the amount of anticompaction - * that is expected. For all other repair types, this involves the amount of data covering the range being - * repaired. - *
        • - *
        • - * partitions_per_assignment: The maximum number of partitions that should be included in a repair - * assignment. This configuration exists to reduce excessive overstreaming by attempting to limit the number - * of partitions present in a merkle tree leaf node. - *
        • - *
        • - * max_tables_per_assignment: The maximum number of tables that can be included in a repair assignment. - * This aims to reduce the number of repairs, especially in cases where a large amount of tables exists for - * a keyspace. Note that the splitter will avoid batching tables together if they exceed the other - * configuration parameters such as bytes_per_assignment and partitions_per_assignment. - *
        • - *
        • - * max_bytes_per_schedule: The maximum number of bytes to cover an individual schedule. This serves - * as a mechanism for throttling the amount of work that can be done on each repair cycle. One may opt to - * reduce this value if the impact of repairs is causing too much load on the cluster, or increase it if - * writes outpace the amount of data being repaired. Alternatively, one may want to choose tuning down or up - * the min_repair_interval. - *
        • - *
        - * Given the impact of what each repair type accomplishes, different defaults are established per repair type. - *
          - *
        • - * full: Configured in a way that attempts to accomplish repairing all data in a schedule, with - * individual repairs targeting at most 200GiB of data and 1048576 partitions. - * max_bytes_per_schedule is set to a large value for full repair to attempt to repair all data per - * repair schedule. - *
            - *
          • bytes_per_assignment: 200GiB
          • - *
          • partitions_per_assignment: 1048576
          • - *
          • max_tables_per_assignment: 64
          • - *
          • max_bytes_per_schedule: 100TiB
          • - *
          - *
        • - *
        • - * incremental: Configured in a way that attempts to repair 50GiB of data per repair, and 100GiB per - * schedule. This attempts to throttle the amount of IR and anticompaction done per schedule after turning - * incremental on for the first time. You may want to consider increasing max_bytes_per_schedule - * more than this much data is written per min_repair_interval. - *
            - *
          • bytes_per_assignment: 50GiB
          • - *
          • partitions_per_assignment: 1048576
          • - *
          • max_tables_per_assignment: 64
          • - *
          • max_bytes_per_schedule: 100GiB
          • - *
          - *
        • - *
        • - * preview_repaired: Configured in a way that attempts to accomplish previewing all data in a schedule, - * with previews targeting at most 200GiB of data and 1048576 partitions. - *
            - *
          • bytes_per_assignment: 200GiB
          • - *
          • partitions_per_assignment: 1048576
          • - *
          • max_tables_per_assignment: 64
          • - *
          • max_bytes_per_schedule: 100TiB
          • - *
          - *
        • - *
        + *

        */ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { @@ -171,9 +94,25 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter // Default max bytes to 100TiB, which is much more readable than Long.MAX_VALUE private static final DataStorageSpec.LongBytesBound MAX_BYTES = new DataStorageSpec.LongBytesBound(100_000, DataStorageSpec.DataStorageUnit.GIBIBYTES); + /** + * The target bytes that should be included in a repair assignment + */ static final String BYTES_PER_ASSIGNMENT = "bytes_per_assignment"; + + /** + * Maximum number of partitions to include in a repair assignment + */ static final String PARTITIONS_PER_ASSIGNMENT = "partitions_per_assignment"; + + /** + * Maximum number of tables to include in a repair assignment if {@link AutoRepairConfig.Options#repair_by_keyspace} + * is enabled. + */ static final String MAX_TABLES_PER_ASSIGNMENT = "max_tables_per_assignment"; + + /** + * The maximum number of bytes to cover in an individual schedule + */ static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; static final List PARAMETERS = Arrays.asList(BYTES_PER_ASSIGNMENT, PARTITIONS_PER_ASSIGNMENT, MAX_TABLES_PER_ASSIGNMENT, MAX_BYTES_PER_SCHEDULE); @@ -193,24 +132,23 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter *

        * Defaults if not specified for the given repair type: *

      • - *
          bytes_per_assignment: 200GiB
        - *
          partitions_per_assignment: 2^repair_session_max_tree_depth
        + *
          bytes_per_assignment: 50GiB
        + *
          partitions_per_assignment: 1048576 (2^20)
        *
          max_tables_per_assignment: 64
        *
          max_bytes_per_schedule: 1000GiB
        *
      • - * It's expected that these defaults should work well for everything except incremental, so we confine - * bytes_per_assignment to 50GiB and max_bytes_per_schedule to 100GiB. This should strike a good balance - * between the amount of data that will be repaired during an initial migration to incremental repair and should - * move the entire repaired set from unrepaired to repaired at steady state, assuming not more the 100GiB of - * data is written to a node per min_repair_interval. + * It's expected that these defaults should work well for everything except incremental, where we + * max_bytes_per_schedule to 100GiB. This should strike a good balance between the amount of data that will be + * repaired during an initial migration to incremental repair and should move the entire repaired set from + * unrepaired to repaired at steady state, assuming not more the 100GiB of data is written to a node per + * min_repair_interval. */ private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) {{ put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) .build()); - // Restrict incremental repair to 50GB bytes per assignment to confine the amount of possible autocompaction. + // Restrict incremental repair to 100GB max bytes per schedule to confine the amount of possible autocompaction. put(AutoRepairConfig.RepairType.INCREMENTAL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.INCREMENTAL) - .withBytesPerAssignment(new DataStorageSpec.LongBytesBound("50GiB")) .withMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound("100GiB")) .build()); put(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.PREVIEW_REPAIRED) @@ -946,8 +884,8 @@ static RepairTypeDefaultsBuilder builder(AutoRepairConfig.RepairType repairType) static class RepairTypeDefaultsBuilder { private final AutoRepairConfig.RepairType repairType; - private DataStorageSpec.LongBytesBound bytesPerAssignment = new DataStorageSpec.LongBytesBound("200GiB"); - // Aims to target at most 1 partitons per leaf assuming a merkle tree of depth 20 (2^20 = 1,048,576) + private DataStorageSpec.LongBytesBound bytesPerAssignment = new DataStorageSpec.LongBytesBound("50GiB"); + // Aims to target at most 1 partitions per leaf assuming a merkle tree of depth 20 (2^20 = 1,048,576) private long partitionsPerAssignment = 1_048_576; private int maxTablesPerAssignment = 64; private DataStorageSpec.LongBytesBound maxBytesPerSchedule = MAX_BYTES; @@ -957,6 +895,7 @@ private RepairTypeDefaultsBuilder(AutoRepairConfig.RepairType repairType) this.repairType = repairType; } + @SuppressWarnings("unused") public RepairTypeDefaultsBuilder withBytesPerAssignment(DataStorageSpec.LongBytesBound bytesPerAssignment) { this.bytesPerAssignment = bytesPerAssignment; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index e8c60f2d9b50..2c3e4401ed9f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -41,7 +41,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; /** @@ -437,15 +436,7 @@ public void testDefaultOptions() assertEquals(new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()), options.token_range_splitter); assertEquals(new DurationSpec.IntSecondsBound("5m"), options.initial_scheduler_delay); assertEquals(new DurationSpec.IntSecondsBound("3h"), options.repair_session_timeout); - - if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) - { - assertEquals(new DurationSpec.IntSecondsBound("1h"), options.min_repair_interval); - } - else - { - assertEquals(new DurationSpec.IntSecondsBound("24h"), options.min_repair_interval); - } + assertEquals(new DurationSpec.IntSecondsBound("24h"), options.min_repair_interval); } @Test @@ -466,6 +457,6 @@ public void testGlobalOptions() assertEquals(new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()), config.global_settings.token_range_splitter); assertEquals(new DurationSpec.IntSecondsBound("5m"), config.global_settings.initial_scheduler_delay); assertEquals(new DurationSpec.IntSecondsBound("3h"), config.global_settings.repair_session_timeout); - assertNull(config.global_settings.min_repair_interval); + assertEquals(new DurationSpec.IntSecondsBound("24h"), config.global_settings.min_repair_interval); } } From 9acc02cce8fda0676f86c06394e65440703eeb0b Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sat, 8 Mar 2025 01:42:01 -0600 Subject: [PATCH 232/257] Revert tombstones.adoc no op changes. --- .../pages/managing/operating/compaction/tombstones.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc index 48b1a9b62e75..9e0dcb6f7879 100644 --- a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc +++ b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc @@ -24,6 +24,7 @@ This approach is used instead of removing values because of the distributed natu Once an object is marked as a tombstone, queries will ignore all values that are time-stamped previous to the tombstone insertion. == Zombies + In a multi-node cluster, {cassandra} may store replicas of the same data on two or more nodes. This helps prevent data loss, but it complicates the deletion process. If a node receives a delete command for data it stores locally, the node tombstones the specified object and tries to pass the tombstone to other nodes containing replicas of that object. @@ -49,7 +50,7 @@ But if the node does not recover until after the grace period ends, {cassandra} After the tombstone's grace period ends, {cassandra} deletes the tombstone during compaction. -== Deletion +== Deletion After `gc_grace_seconds` has expired the tombstone may be removed (meaning there will no longer be any object that a certain piece of data was deleted). From 91ba32c8c08b69c01a125d0723c05b5479b9ab60 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sat, 8 Mar 2025 02:16:45 -0600 Subject: [PATCH 233/257] Add documentation for reject_repair_compaction_threshold --- conf/cassandra.yaml | 4 ++++ conf/cassandra_latest.yaml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 57f053c7c07a..27b63abe0df3 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2675,6 +2675,10 @@ storage_compatibility_mode: NONE # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s +# Prevents preparing a repair session or beginning a repair streaming session if pending compactions is over +# the given value. Defaults to disabled. +# reject_repair_compaction_threshold: 1024 + # At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during # incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index e3d0c2e89d23..2b6be317fb3a 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2370,6 +2370,10 @@ default_secondary_index_enabled: true # storage_compatibility_mode: NONE +# Prevents preparing a repair session or beginning a repair streaming session if pending compactions is over +# the given value. Defaults to disabled. +# reject_repair_compaction_threshold: 1024 + # At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during # incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. # if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) From aa383bedc3dc496b3396a9a65f436ae87f2e5436 Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:06:33 -0500 Subject: [PATCH 234/257] Update src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java Co-authored-by: Francisco Guerrero --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index ea0973729873..b34f1173c68a 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -147,7 +147,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter {{ put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) .build()); - // Restrict incremental repair to 100GB max bytes per schedule to confine the amount of possible autocompaction. + // Restrict incremental repair to 100GiB max bytes per schedule to confine the amount of possible autocompaction. put(AutoRepairConfig.RepairType.INCREMENTAL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.INCREMENTAL) .withMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound("100GiB")) .build()); From 51b05c4b5ad5c49120cbf8ce475f946e1c8bdbb1 Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:08:44 -0500 Subject: [PATCH 235/257] Update conf/cassandra.yaml Co-authored-by: Francisco Guerrero --- conf/cassandra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 27b63abe0df3..92ccc3d05a2c 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2708,7 +2708,7 @@ storage_compatibility_mode: NONE # # round on all nodes in less than this duration, it will not start a new repair round on a given node until # # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce # # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than -# # gc_grace_seconds to avoid zombies. +# # gc_grace_seconds to avoid potential data resurrection. # min_repair_interval: 24h # token_range_splitter: # # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges From 775fd7b145924db71f6b3d2802a1e637e897a8b4 Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:09:14 -0500 Subject: [PATCH 236/257] Update conf/cassandra.yaml Co-authored-by: Francisco Guerrero --- conf/cassandra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 92ccc3d05a2c..1cba28c6d68c 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2744,7 +2744,7 @@ storage_compatibility_mode: NONE # # so a more frequent schedule such as 1h is recommended. # # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to # # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental -# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep to unrepaired +# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep the unrepaired # # set small. # min_repair_interval: 24h # token_range_splitter: From f08b2cac6d45bd57e35559c975f0dbb4b9f979ca Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:10:12 -0500 Subject: [PATCH 237/257] Update conf/cassandra.yaml Co-authored-by: Francisco Guerrero --- conf/cassandra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 1cba28c6d68c..bc135e894541 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2753,7 +2753,7 @@ storage_compatibility_mode: NONE # # This throttles the amount of incremental repair and anticompaction done per schedule after incremental # # repairs are turned on. # bytes_per_assignment: 50GiB -# # Resricts the maximum number of bytes to cover in an individual schedule to 100GiB. +# # Resricts the maximum number of bytes to cover in an individual schedule to the configured max_bytes_per_schedule value (defaults to 100GiB). # # Consider increasing this if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: From 3ada7beeab5d74f39881d2d872522e0c9bd8dae2 Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 11:10:24 -0500 Subject: [PATCH 238/257] Update conf/cassandra.yaml Co-authored-by: Francisco Guerrero --- conf/cassandra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index bc135e894541..935876cf90fa 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2754,7 +2754,7 @@ storage_compatibility_mode: NONE # # repairs are turned on. # bytes_per_assignment: 50GiB # # Resricts the maximum number of bytes to cover in an individual schedule to the configured max_bytes_per_schedule value (defaults to 100GiB). -# # Consider increasing this if more data is written than this limit within the min_repair_interval. +# # Consider increasing this value if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: # # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired From 706b390137eaafce12e0f26d7529216553431d6a Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 12:27:17 -0500 Subject: [PATCH 239/257] Doc and conf fixes, initial IR enablement guidance --- conf/cassandra.yaml | 18 ++--- conf/cassandra_latest.yaml | 21 +++--- .../pages/managing/operating/auto_repair.adoc | 71 ++++++++++++++----- .../pages/managing/operating/repair.adoc | 5 +- 4 files changed, 76 insertions(+), 39 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 935876cf90fa..f8574dad806c 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2726,9 +2726,9 @@ storage_compatibility_mode: NONE # # parameters: # # param_key1: param_value1 # parameters: -# # The target and maximum amount of bytes that should be included in a repair -# # assignment. This scopes the amount of work involved in a repair and includes -# # the data covering the range being repaired. +# # The target and maximum amount of compressed bytes that should be included in a repair assignment. +# # This scopes the amount of work involved in a repair and includes the data covering the range being +# # repaired. # bytes_per_assignment: 50GiB # # The maximum number of bytes to cover in an individual schedule. This serves as # # a mechanism to throttle the work done in each repair cycle. You may reduce this @@ -2742,18 +2742,18 @@ storage_compatibility_mode: NONE # # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair # # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, # # so a more frequent schedule such as 1h is recommended. -# # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to -# # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental -# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep the unrepaired -# # set small. +# # NOTE: Please consult +# # https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html#enabling-ir +# # for guidance on enabling incremental repair on ane exiting cluster. # min_repair_interval: 24h # token_range_splitter: # parameters: -# # Configured to attempt repairing 50GiB of data per repair. +# # Configured to attempt repairing 50GiB of compressed data per repair. # # This throttles the amount of incremental repair and anticompaction done per schedule after incremental # # repairs are turned on. # bytes_per_assignment: 50GiB -# # Resricts the maximum number of bytes to cover in an individual schedule to the configured max_bytes_per_schedule value (defaults to 100GiB). +# # Restricts the maximum number of bytes to cover in an individual schedule to the configured +# # max_bytes_per_schedule value (defaults to 100GiB for incremental). # # Consider increasing this value if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 2b6be317fb3a..5f754a997565 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2403,7 +2403,7 @@ storage_compatibility_mode: NONE # # round on all nodes in less than this duration, it will not start a new repair round on a given node until # # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce # # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than -# # gc_grace_seconds to avoid zombies. +# # gc_grace_seconds to avoid potential data resurrection. # min_repair_interval: 24h # token_range_splitter: # # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges @@ -2421,9 +2421,9 @@ storage_compatibility_mode: NONE # # parameters: # # param_key1: param_value1 # parameters: -# # The target and maximum amount of bytes that should be included in a repair -# # assignment. This scopes the amount of work involved in a repair and includes -# # the data covering the range being repaired. +# # The target and maximum amount of compressed bytes that should be included in a repair assignment. +# # This scopes the amount of work involved in a repair and includes the data covering the range being +# # repaired. # bytes_per_assignment: 50GiB # # The maximum number of bytes to cover in an individual schedule. This serves as # # a mechanism to throttle the work done in each repair cycle. You may reduce this @@ -2438,19 +2438,16 @@ storage_compatibility_mode: NONE # # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair # # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, # # so a more frequent schedule such as 1h is recommended. -# # NOTE: When turning on incremental repair for the first time with a decent amount of data it is adviable to -# # use a larger interval such as 24h or longer to reduce the impact of anticompaction caused by incremental -# # repair. Once a majority of data has been repaired, one may consider reducing this to 1h to keep to unrepaired -# # set small. -# min_repair_interval: 24h +# min_repair_interval: 1h # token_range_splitter: # parameters: -# # Configured to attempt repairing 50GiB of data per repair. +# # Configured to attempt repairing 50GiB of compressed data per repair. # # This throttles the amount of incremental repair and anticompaction done per schedule after incremental # # repairs are turned on. # bytes_per_assignment: 50GiB -# # Resricts the maximum number of bytes to cover in an individual schedule to 100GiB. -# # Consider increasing this if more data is written than this limit within the min_repair_interval. +# # Restricts the maximum number of bytes to cover in an individual schedule to the configured +# # max_bytes_per_schedule value (defaults to 100GiB for incremental). +# # Consider increasing this value if more data is written than this limit within the min_repair_interval. # max_bytes_per_schedule: 100GiB # preview_repaired: # # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index db50a8c4524b..e82cb95d1223 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -38,7 +38,7 @@ xref:#fixed-split-token-range-splitter[FixedSplitTokenRangeSplitter]. == Considerations -Before enabling auto repair, please consult the xref:managing/operating/repair.adoc[Repair] guide to establish a base +Before enabling Auto Repair, please consult the xref:managing/operating/repair.adoc[Repair] guide to establish a base understanding of repairs. === Full Repair @@ -59,11 +59,48 @@ be eventually purged. Running incremental repair more frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, so a shorter `min_repair_interval` such as `1h` is recommended for new clusters. -NOTE: When turning on incremental repair for the first time on an existing cluser with a large amount of data it is -advisable to use a larger interval such as `24h` or longer to reduce the impact of anticompaction caused by incremental -repair. Once the entire token range has been repaired, one may consider reducing this to a shorter interval. It is -strongly recommended to not enable incremental repair for an existing cluster unless full repair has previously -actively run. +==== Enabling Incremental Repair on existing clusters with a large amount of data +[#enabling-ir] +One should be careful when enabling incremental repair on a cluster for the first time. While +xref:#repair-token-range-splitter[RepairTokenRangeSplitter] includes a default configuration to attempt to gracefully +migrate to incremental repair over time, failure to take proper precaution could overwhelm the cluster with +xref:managing/operating/compaction/overview.adoc#types-of-compaction[anticompactions]. + +No matter how one goes about enabling and running incremental repair, it is recommended to run a cycle of full repairs +for the entire cluster as pre-flight step to running incremental repair. This will put the cluster into a more +consistent state which will reduce the amount of streaming between replicas when incremental repair initially runs. + +If you do not have strong data consistency requirements, one may consider using +xref:managing/tools/sstable/sstablerepairedset.adoc[nodetool sstablerepairedset] to mark all SSTables as repaired +before enabling incremental repair scheduling using Auto Repair. This will reduce the burden of initially running +incremental repair because all existing data will be considered as repaired, so subsequent incremental repairs will +only run against new data. + +If you do have strong data consistency requirements, then one must treat all data as initially unrepaired and run +incremental repair against it. Consult +xref:#incremental-repair-defaults[RepairTokenRangeSplitter's Incremental repair defaults]. + +In particular one should be mindful of the xref:managing/operating/compaction/overview.adoc[compaction strategy] +you use for your tables and how it might impact incremental repair before running incremental repair for the first +time: + +- *Large SSTables*: When using xref:managing/operating/compaction/stcs.adoc[SizeTieredCompactionStrategy] or any + compaction strategy which can create large SSTables including many partitions the amount of + xref:managing/operating/compaction/overview.adoc#types-of-compaction[anticompaction] that might be required could be + excessive. Using a small `bytes_per_assignment` might contribute to repeated anticompactions over the same + unrepaired data. +- *Partitions overlapping many SSTables*: If partitions overlap between many SSTables, the amount of SSTables included + in a repair might be large. Therefore it is important to consider that many SSTables may be included in a repair + session and must all be anticompacted. xref:managing/operating/compaction/lcs.adoc[LeveledCompactionStrategy] is less + susceptible to this issue as it prevents overlapping of partitions within levels outside of L0, but if SSTables + start accumulating in L0 between incremental repairs, the cost of anticompaction will increase. + xref:managing/operating/compaction/ucs#sharding[UnifiedCompactionStrategy's sharding] can also be used to avoid + partitions overlapping SSTables. + +The xref:#repair-token-range-splitter[token_range_splitter] configuration for incremental repair includes a default +configuration that attempts to conservatively migrate 100GiB of compressed data every day per node. Depending on +requirements, data set and capability of a cluster's hardware, one may consider tuning these values to be more +aggressive or conservative. === Previewing Repaired Data @@ -80,11 +117,11 @@ When enabled, the `BytesPreviewedDesynchronized` and `TokenRangesPreviewedDesync xref:cassandra:managing/operating/metrics.adoc#table-metrics[table metrics] can be used to detect inconsistencies in the repaired data set. -== Configuring auto repair in cassandra.yaml +== Configuring Auto Repair in cassandra.yaml -Configuration for auto repair is managed in the `cassandra.yaml` file by the `auto_repair` property. +Configuration for Auto Repair is managed in the `cassandra.yaml` file by the `auto_repair` property. -A rich set of configuration exists for configuring auto repair with sensible defaults. However, the expectation +A rich set of configuration exists for configuring Auto Repair with sensible defaults. However, the expectation is that some tuning might be needed particulary when it comes to tuning how often repair should run (`min_repair_interval`) and how repair assignments as created (`token_range_splitter`). @@ -148,9 +185,9 @@ type by using `repair_type_overrides`. such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one round on all nodes in less than this duration, it will not start a new repair round on a given node until this much time has passed since the last repair completed. Consider increasing to a larger value to reduce the impact of repairs, -however *note that one should attempt to run repairs at a smaller interval than gc_grace_seconds to -avoid xref:cassandra:managing/operating/compaction/tombstones.adoc#zombies[zombies]*. -| class_name | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Implementation of +however note that one should attempt to run repairs at a smaller interval than gc_grace_seconds to +avoid xref:cassandra:managing/operating/compaction/tombstones.adoc#zombies[data resurrection]. +| token_range_splitter.class_name | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Implementation of IAutoRepairTokenRangeSplitter to use; responsible for splitting token ranges for repair assignments. Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter,FixedTokenRangeSplitter}. | repair_by_keyspace | true | If true, attempts to group tables in the same keyspace into one repair; otherwise, @@ -183,20 +220,20 @@ in nodetool repair. General advice is to keep this true. `RepairTokenRangeSplitter` is the default implementation of `IAutoRepairTokenRangeSplitter` that attempts to create token range assignments meeting the following goals: -1. *Create smaller, consistent repair times*: Long repairs, such as those lasting 15 hours, can be problematic. If a +- *Create smaller, consistent repair times*: Long repairs, such as those lasting 15 hours, can be problematic. If a node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact of disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration becoming the main bottleneck. -2. *Minimize the impact on hosts*: Repairs should not heavily affect the host systems. For incremental repairs, this +- *Minimize the impact on hosts*: Repairs should not heavily affect the host systems. For incremental repairs, this might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide partitions can lead to issues with disk usage and higher compaction costs. -3. *Reduce overstreaming*: The Merkle tree, which represents data within each partition and range, has a maximum size. +- *Reduce overstreaming*: The Merkle tree, which represents data within each partition and range, has a maximum size. If a repair covers too many partitions, the tree’s leaves represent larger data ranges. Even a small change in a leaf can trigger excessive data streaming, making the process inefficient. -4. *Reduce number of repairs*: If there are many small tables, it's beneficial to batch these tables together under a +- *Reduce number of repairs*: If there are many small tables, it's beneficial to batch these tables together under a single parent repair. This prevents the repair overhead from becoming a bottleneck, especially when dealing with hundreds of tables. Running individual repairs for each table can significantly impact performance and efficiency. @@ -281,7 +318,7 @@ conservative this top level configuration could be increased to a larger value t == Table configuration -If auto repair is enabled in cassandra.yaml, the `auto_repair` property may be optionally configured at the table +If Auto Repair is enabled in cassandra.yaml, the `auto_repair` property may be optionally configured at the table level, e.g.: [source,cql] diff --git a/doc/modules/cassandra/pages/managing/operating/repair.adoc b/doc/modules/cassandra/pages/managing/operating/repair.adoc index 1d0cc977faee..d7eaba171125 100644 --- a/doc/modules/cassandra/pages/managing/operating/repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/repair.adoc @@ -36,7 +36,10 @@ traditionally not been run automatically by Cassandra. In the latest version of Cassandra, a new feature called xref:managing/operating/auto_repair.adoc[auto repair] was introduced to -allow Cassandra to run repairs automatically on a schedule. +allow Cassandra to submit and manage repairs automatically on a schedule. + +The introduction of this feature does not interfere with existing repair +functionality enabled via nodetool. == Submitting Repairs Using Nodetool From b809c4a982064818e2b0d55e50ea2c9ae2349bbf Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 13:22:35 -0500 Subject: [PATCH 240/257] Update src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java Co-authored-by: Francisco Guerrero --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index b34f1173c68a..ace01ff514e2 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -106,7 +106,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter /** * Maximum number of tables to include in a repair assignment if {@link AutoRepairConfig.Options#repair_by_keyspace} - * is enabled. + * is enabled */ static final String MAX_TABLES_PER_ASSIGNMENT = "max_tables_per_assignment"; From 5c58c2efe0532e1f0fbb2a6abf85fc24c717985d Mon Sep 17 00:00:00 2001 From: Andrew Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Sun, 9 Mar 2025 13:22:51 -0500 Subject: [PATCH 241/257] Update src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java Co-authored-by: Francisco Guerrero --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index ace01ff514e2..a743df946e36 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -137,7 +137,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter *
          max_tables_per_assignment: 64
        *
          max_bytes_per_schedule: 1000GiB
        * - * It's expected that these defaults should work well for everything except incremental, where we + * It's expected that these defaults should work well for everything except incremental, where we set * max_bytes_per_schedule to 100GiB. This should strike a good balance between the amount of data that will be * repaired during an initial migration to incremental repair and should move the entire repaired set from * unrepaired to repaired at steady state, assuming not more the 100GiB of data is written to a node per From 45edb0454ec3ec83bb7d5094e182170a58b014d1 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Mon, 10 Mar 2025 17:25:12 -0500 Subject: [PATCH 242/257] Update num_of_subranges --- .../cassandra/pages/managing/operating/auto_repair.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index e82cb95d1223..c456536d905c 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -297,9 +297,9 @@ The following `parameters` apply for `FixedSplitTokenRangeSplitter` configuratio [cols=",,",options="header",] |=== | Name | Default | Description -| number_of_subranges | 64 | Number of evenly split subranges to create for each node that repair runs for. +| number_of_subranges | 32 | Number of evenly split subranges to create for each node that repair runs for. If vnodes are configured using `num_tokens`, attempts to evenly subdivide subranges by each range. For example, for -`num_tokens: 16` and `number_of_subranges: 64`, 4 (64/16) repair assignments will be created for each token range. At +`num_tokens: 16` and `number_of_subranges: 32`, 2 (32/16) repair assignments will be created for each token range. At least one repair assignment will be created for each token range. |=== From 317dfbb31cce1a3c75493e9b79183a06929a1511 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Tue, 11 Mar 2025 17:06:07 -0700 Subject: [PATCH 243/257] Move auto-repair retry policy config to repair-type level --- .../repair/autorepair/AutoRepair.java | 8 ++-- .../repair/autorepair/AutoRepairConfig.java | 48 +++++++++---------- .../cassandra/service/AutoRepairService.java | 28 +++++------ .../service/AutoRepairServiceMBean.java | 7 ++- .../org/apache/cassandra/tools/NodeProbe.java | 20 ++++---- .../tools/nodetool/SetAutoRepairConfig.java | 12 ++--- .../AutoRepairParameterizedTest.java | 12 ++--- .../service/AutoRepairServiceBasicTest.java | 8 ++-- .../nodetool/SetAutoRepairConfigTest.java | 24 ++-------- 9 files changed, 74 insertions(+), 93 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 07fb4793c51e..7238d14acdf6 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -313,7 +313,7 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim boolean success = false; int retryCount = 0; Future f = null; - while (retryCount <= config.getRepairMaxRetries()) + while (retryCount <= config.getRepairMaxRetries(repairType)) { RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, Lists.newArrayList(curRepairAssignment.getTableNames()), @@ -336,14 +336,14 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim { break; } - else if (retryCount < config.getRepairMaxRetries()) + else if (retryCount < config.getRepairMaxRetries(repairType)) { boolean cancellationStatus = f.cancel(true); logger.warn("Repair failed for range {}-{} for {} tables {} with cancellationStatus: {} retrying after {} seconds...", tokenRange.left, tokenRange.right, keyspaceName, curRepairAssignment.getTableNames(), - cancellationStatus, config.getRepairRetryBackoff().toSeconds()); - sleepFunc.accept(config.getRepairRetryBackoff().toSeconds(), TimeUnit.SECONDS); + cancellationStatus, config.getRepairRetryBackoff(repairType).toSeconds()); + sleepFunc.accept(config.getRepairRetryBackoff(repairType).toSeconds(), TimeUnit.SECONDS); } retryCount++; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index c352c48b1140..a23cd3021232 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -56,10 +56,6 @@ public class AutoRepairConfig implements Serializable // The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata // for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. public volatile DurationSpec.IntSecondsBound history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("2h"); - // Maximum number of retries for a repair session. - public volatile Integer repair_max_retries = 3; - // Backoff time before retrying a repair session. - public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); // Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming // the node by scheduling too many repair tasks in a short period of time. public volatile DurationSpec.LongSecondsBound repair_task_min_duration = new DurationSpec.LongSecondsBound("5s"); @@ -169,26 +165,6 @@ public void setAutoRepairHistoryClearDeleteHostsBufferInterval(String duration) history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound(duration); } - public int getRepairMaxRetries() - { - return repair_max_retries; - } - - public void setRepairMaxRetries(int maxRetries) - { - repair_max_retries = maxRetries; - } - - public DurationSpec.LongSecondsBound getRepairRetryBackoff() - { - return repair_retry_backoff; - } - - public void setRepairRetryBackoff(String interval) - { - repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); - } - public DurationSpec.LongSecondsBound getRepairTaskMinDuration() { return repair_task_min_duration; @@ -360,6 +336,26 @@ public void setRepairSessionTimeout(RepairType repairType, String repairSessionT getOptions(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); } + public int getRepairMaxRetries(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_max_retries); + } + + public void setRepairMaxRetries(RepairType repairType, int maxRetries) + { + getOptions(repairType).repair_max_retries = maxRetries; + } + + public DurationSpec.LongSecondsBound getRepairRetryBackoff(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_retry_backoff); + } + + public void setRepairRetryBackoff(RepairType repairType, String interval) + { + getOptions(repairType).repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); + } + @VisibleForTesting static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(RepairType repairType, ParameterizedClass parameterizedClass) throws ConfigurationException { @@ -505,6 +501,10 @@ protected static Options getDefaultOptions() public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; // Timeout for resuming stuck repair sessions. public volatile DurationSpec.IntSecondsBound repair_session_timeout; + // Maximum number of retries for a repair session. + public volatile Integer repair_max_retries = 3; + // Backoff time before retrying a repair session. + public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); public String toString() { diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 0cf744296e07..9f66a92abc95 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -100,8 +100,6 @@ public String getAutoRepairConfiguration() StringBuilder sb = new StringBuilder(); sb.append("repair scheduler configuration:"); appendConfig(sb, "repair_check_interval", config.getRepairCheckInterval()); - appendConfig(sb, "repair_max_retries", config.getRepairMaxRetries()); - appendConfig(sb, "repair_retry_backoff", config.getRepairRetryBackoff()); appendConfig(sb, "repair_task_min_duration", config.getRepairTaskMinDuration()); appendConfig(sb, "history_clear_delete_hosts_buffer_interval", config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); for (RepairType repairType : RepairType.values()) @@ -162,18 +160,6 @@ public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); } - @Override - public void setAutoRepairMaxRetriesCount(int retries) - { - config.setRepairMaxRetries(retries); - } - - @Override - public void setAutoRepairRetryBackoff(String interval) - { - config.setRepairRetryBackoff(interval); - } - @Override public void setAutoRepairMinRepairTaskDuration(String duration) { @@ -261,6 +247,18 @@ public void setRepairByKeyspace(String repairType, boolean repairByKeyspace) config.setRepairByKeyspace(RepairType.parse(repairType), repairByKeyspace); } + @Override + public void setAutoRepairMaxRetriesCount(String repairType, int retries) + { + config.setRepairMaxRetries(RepairType.parse(repairType), retries); + } + + @Override + public void setAutoRepairRetryBackoff(String repairType, String interval) + { + config.setRepairRetryBackoff(RepairType.parse(repairType), interval); + } + private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig config) { StringBuilder sb = new StringBuilder(); @@ -288,6 +286,8 @@ private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig co appendConfig(sb , "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); appendConfig(sb , "repair_session_timeout", config.getRepairSessionTimeout(repairType)); appendConfig(sb , "force_repair_new_node", config.getForceRepairNewNode(repairType)); + appendConfig(sb , "repair_max_retries", config.getRepairMaxRetries(repairType)); + appendConfig(sb , "repair_retry_backoff", config.getRepairRetryBackoff(repairType)); final ParameterizedClass splitterClass = config.getTokenRangeSplitter(repairType); final String splitterClassName = splitterClass.class_name != null ? splitterClass.class_name : AutoRepairConfig.DEFAULT_SPLITTER.getName(); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index d57329e86795..72d9da7c5a0c 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -39,10 +39,6 @@ public interface AutoRepairServiceMBean public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); - public void setAutoRepairMaxRetriesCount(int retries); - - public void setAutoRepairRetryBackoff(String interval); - public void setAutoRepairMinRepairTaskDuration(String duration); public void setRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold); @@ -71,4 +67,7 @@ public interface AutoRepairServiceMBean public void setRepairByKeyspace(String repairType, boolean repairByKeyspace); + public void setAutoRepairMaxRetriesCount(String repairType, int retries); + + public void setAutoRepairRetryBackoff(String repairType, String interval); } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 723894caa093..bf93bd179d4d 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2610,16 +2610,6 @@ public void startAutoRepairScheduler() autoRepairProxy.startScheduler(); } - public void setAutoRepairMaxRetriesCount(int retries) - { - autoRepairProxy.setAutoRepairMaxRetriesCount(retries); - } - - public void setAutoRepairRetryBackoff(String interval) - { - autoRepairProxy.setAutoRepairRetryBackoff(interval); - } - public void setAutoRepairMinRepairTaskDuration(String duration) { autoRepairProxy.setAutoRepairMinRepairTaskDuration(duration); @@ -2684,6 +2674,16 @@ public void setAutoRepairRepairByKeyspace(String repairType, boolean enabled) { autoRepairProxy.setRepairByKeyspace(repairType, enabled); } + + public void setAutoRepairMaxRetriesCount(String repairType, int retries) + { + autoRepairProxy.setAutoRepairMaxRetriesCount(repairType, retries); + } + + public void setAutoRepairRetryBackoff(String repairType, String interval) + { + autoRepairProxy.setAutoRepairRetryBackoff(repairType, interval); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 37f809943fb6..5a9a1411bb59 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -87,12 +87,6 @@ public void execute(NodeProbe probe) case "history_clear_delete_hosts_buffer_interval": probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); return; - case "repair_max_retries": - probe.setAutoRepairMaxRetriesCount(Integer.parseInt(paramVal)); - return; - case "repair_retry_backoff": - probe.setAutoRepairRetryBackoff(paramVal); - return; case "min_repair_task_duration": probe.setAutoRepairMinRepairTaskDuration(paramVal); return; @@ -163,6 +157,12 @@ public void execute(NodeProbe probe) case "repair_by_keyspace": probe.setAutoRepairRepairByKeyspace(repairTypeStr, Boolean.parseBoolean(paramVal)); break; + case "repair_max_retries": + probe.setAutoRepairMaxRetriesCount(repairTypeStr, Integer.parseInt(paramVal)); + break; + case "repair_retry_backoff": + probe.setAutoRepairRetryBackoff(repairTypeStr, paramVal); + break; default: throw new IllegalArgumentException("Unknown parameter: " + paramType); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index faa70e3d451f..1cc80791b4e6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -489,7 +489,7 @@ public void testMetrics() AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); config.setMaterializedViewRepairEnabled(repairType, true); config.setRepairMinInterval(repairType, "0s"); - config.setRepairRetryBackoff("0s"); + config.setRepairRetryBackoff(repairType, "0s"); config.setAutoRepairTableMaxRepairTime(repairType, "0s"); AutoRepair.timeFunc = () -> { timeFuncCalls++; @@ -653,7 +653,7 @@ public void testRepairMaxRetries() AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.SECONDS, unit); - assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); + assertEquals(config.getRepairRetryBackoff(repairType).toSeconds(), (long) duration); }; config.setRepairMinInterval(repairType, "0s"); AutoRepair.instance.repairStates.put(repairType, autoRepairState); @@ -661,7 +661,7 @@ public void testRepairMaxRetries() AutoRepair.instance.repair(repairType); // Expect configured retries for each keyspace expected to be repaired - assertEquals(config.getRepairMaxRetries()*expectedRepairAssignments, sleepCalls.get()); + assertEquals(config.getRepairMaxRetries(repairType)*expectedRepairAssignments, sleepCalls.get()); verify(autoRepairState, times(1)).setSucceededTokenRangesCount(0); verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); verify(autoRepairState, times(1)).setFailedTokenRangesCount(expectedRepairAssignments); @@ -677,7 +677,7 @@ public void testRepairSuccessAfterRetry() AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { sleepCalls.getAndIncrement(); assertEquals(TimeUnit.SECONDS, unit); - assertEquals(config.getRepairRetryBackoff().toSeconds(), (long) duration); + assertEquals(config.getRepairRetryBackoff(repairType).toSeconds(), (long) duration); }; doAnswer(invocation -> { if (sleepCalls.get() == 0) @@ -692,7 +692,7 @@ public void testRepairSuccessAfterRetry() return null; }).when(repairRunnable).addProgressListener(Mockito.any()); config.setRepairMinInterval(repairType, "0s"); - config.setRepairMaxRetries(1); + config.setRepairMaxRetries(repairType, 1); AutoRepair.instance.repairStates.put(repairType, autoRepairState); AutoRepair.instance.repair(repairType); @@ -866,7 +866,7 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() return runnable; }).when(spyState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); when(spyState.getLastRepairTime()).thenReturn((long) 0); - AutoRepairService.instance.getAutoRepairConfig().setRepairMaxRetries(0); + AutoRepairService.instance.getAutoRepairConfig().setRepairMaxRetries(repairType, 0); AutoRepair.instance.repairStates.put(repairType, spyState); AutoRepair.instance.repair(repairType); diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java index bd6fcd608ba3..07b8bcc69ec3 100644 --- a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -75,17 +75,17 @@ public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() @Test public void testsetAutoRepairMaxRetriesCount() { - autoRepairService.setAutoRepairMaxRetriesCount(101); + autoRepairService.setAutoRepairMaxRetriesCount(AutoRepairConfig.RepairType.INCREMENTAL.name(), 101); - assertEquals(101, config.getRepairMaxRetries()); + assertEquals(101, config.getRepairMaxRetries(AutoRepairConfig.RepairType.INCREMENTAL)); } @Test public void testsetAutoRepairRetryBackoffInSec() { - autoRepairService.setAutoRepairRetryBackoff("102s"); + autoRepairService.setAutoRepairRetryBackoff(AutoRepairConfig.RepairType.INCREMENTAL.name(), "102s"); - assertEquals(102, config.getRepairRetryBackoff().toSeconds()); + assertEquals(102, config.getRepairRetryBackoff(AutoRepairConfig.RepairType.INCREMENTAL).toSeconds()); } @Test(expected = ConfigurationException.class) diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java index 5a32c8285d48..4ea9516e8ef8 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -95,26 +95,6 @@ public void testHistoryDeleteHostsClearBufferInSec() verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); } - @Test - public void testRepairMaxRetries() - { - cmd.args = ImmutableList.of("repair_max_retries", "2"); - - cmd.execute(probe); - - verify(probe, times(1)).setAutoRepairMaxRetriesCount(2); - } - - @Test - public void testRetryBackoffInSec() - { - cmd.args = ImmutableList.of("repair_retry_backoff", "3s"); - - cmd.execute(probe); - - verify(probe, times(1)).setAutoRepairRetryBackoff("3s"); - } - @Test public void testStartScheduler() { @@ -286,7 +266,9 @@ public static Collection testCases() forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setAutoRepairParallelRepairPercentage(type.name(), 7)), forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairMaterializedViewRepairEnabled(type.name(), true)), forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type.name(), ImmutableSet.of("dc1", "dc2"))), - forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type.name(), "max_bytes_per_schedule", "500GiB")) + forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type.name(), "max_bytes_per_schedule", "500GiB")), + forEachRepairType("repair_max_retries", "3", (type) -> verify(probe, times(1)).setAutoRepairMaxRetriesCount(type.name(), 3)), + forEachRepairType("repair_retry_backoff", "60s", (type) -> verify(probe, times(1)).setAutoRepairRetryBackoff(type.name(), "60s")) ).flatMap(Function.identity()).collect(Collectors.toList()); } From 1af049aab827d8b99f1913bdc222d9c681e48372 Mon Sep 17 00:00:00 2001 From: Kristijonas Zalys Date: Wed, 12 Mar 2025 20:37:50 -0700 Subject: [PATCH 244/257] Update docs --- conf/cassandra.yaml | 8 ++++---- conf/cassandra_latest.yaml | 8 ++++---- .../pages/managing/operating/auto_repair.adoc | 12 +++++++----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index f8574dad806c..b40be44daaec 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2768,10 +2768,6 @@ storage_compatibility_mode: NONE # # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule # # repairs. # repair_check_interval: 5m -# # Maximum number of retries for a repair session. -# repair_max_retries: 3 -# # Backoff time before retrying a repair session. -# repair_retry_backoff: 30s # # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming # # the node by scheduling too many repair tasks in a short period of time. # repair_task_min_duration: 5s @@ -2813,6 +2809,10 @@ storage_compatibility_mode: NONE # # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults # # to true. General advice is to keep this true. # repair_primary_token_range_only: true +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time before retrying a repair session. +# repair_retry_backoff: 30s # token_range_splitter: # # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 5f754a997565..c37ec27c75bf 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2461,10 +2461,6 @@ storage_compatibility_mode: NONE # # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule # # repairs. # repair_check_interval: 5m -# # Maximum number of retries for a repair session. -# repair_max_retries: 3 -# # Backoff time before retrying a repair session. -# repair_retry_backoff: 30s # # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming # # the node by scheduling too many repair tasks in a short period of time. # repair_task_min_duration: 5s @@ -2506,6 +2502,10 @@ storage_compatibility_mode: NONE # # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults # # to true. General advice is to keep this true. # repair_primary_token_range_only: true +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time before retrying a repair session. +# repair_retry_backoff: 30s # token_range_splitter: # # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index c456536d905c..a1bf57acf158 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -164,9 +164,6 @@ for each repair type (full, incremental, and preview_repaired), and based on tha | repair_check_interval | 5m | Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule repairs. | repair_max_retries | 3 | Maximum number of retries for a repair session. -| repair_retry_backoff | 30s | Backoff time before retrying a repair session. -| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the -scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. | history_clear_delete_hosts_buffer_interval | 2h | The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. @@ -212,6 +209,9 @@ centers. Useful if you have keyspaces that are not replicated in certain data ce schedule in certain data centers. | repair_primary_token_range_only | true | Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. General advice is to keep this true. +| repair_retry_backoff | 30s | Backoff time before retrying a repair session. +| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the +scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. |=== === `RepairTokenRangeSplitter` configuration @@ -352,8 +352,6 @@ $> nodetool getautorepairconfig repair scheduler configuration: repair_check_interval: 5m repair_max_retries: 3 - repair_retry_backoff: 30s - repair_task_min_duration: 5s history_clear_delete_hosts_buffer_interval: 2h configuration for repair_type: full enabled: true @@ -370,6 +368,8 @@ configuration for repair_type: full initial_scheduler_delay: 5m repair_session_timeout: 3h force_repair_new_node: false + repair_retry_backoff: 30s + repair_task_min_duration: 5s token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter token_range_splitter.bytes_per_assignment: 50GiB token_range_splitter.partitions_per_assignment: 1048576 @@ -390,6 +390,8 @@ configuration for repair_type: incremental initial_scheduler_delay: 5m repair_session_timeout: 3h force_repair_new_node: false + repair_retry_backoff: 30s + repair_task_min_duration: 5s token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter token_range_splitter.bytes_per_assignment: 50GiB token_range_splitter.partitions_per_assignment: 1048576 From c56f326565a65ac20e3147988a45972a424f59aa Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Wed, 12 Mar 2025 20:34:43 -0500 Subject: [PATCH 245/257] Avoid returning MY_TURN when replicas are actively taking their turn Previously AutoRepairUtils.myTurnToRunRepair would return MY_TURN if there is capacity to select a new node for repair and the current node has the oldest lastRepairFinishTime. This change adjusts this method to make use of a newly added method getMostEligibleHostToRepair which will identify the node with the oldest lastRepairFinishTime of which none of the node's replicas are actively repairing. This should prevent replicas from actively repairing concurrently, which would increase load on replica sets and also create anticompaction conflicts for incremental repair. Behavior is configurable by allow_parallel_replica_repair and allow_parallel_replica_repair_across_schedules configuration. patch by Andy Tolbert, reviewed by Jaydeepkumar Chovatia and Francisco Guerrero for CASSANDRA-20180 --- conf/cassandra.yaml | 9 + conf/cassandra_latest.yaml | 9 + .../pages/managing/operating/auto_repair.adoc | 7 + .../pages/managing/operating/metrics.adoc | 11 +- .../cassandra/metrics/AutoRepairMetrics.java | 39 ++ .../repair/autorepair/AutoRepair.java | 22 +- .../repair/autorepair/AutoRepairConfig.java | 33 ++ .../repair/autorepair/AutoRepairUtils.java | 388 ++++++++++++++++-- .../cassandra/service/AutoRepairService.java | 46 ++- .../service/AutoRepairServiceMBean.java | 4 + .../cassandra/service/StorageService.java | 8 +- .../org/apache/cassandra/tools/NodeProbe.java | 10 + .../tools/nodetool/SetAutoRepairConfig.java | 10 +- ...allelReplicaRepairAcrossSchedulesTest.java | 129 ++++++ .../test/repair/AutoRepairSchedulerTest.java | 113 +++-- .../autorepair/AutoRepairConfigTest.java | 69 +++- .../autorepair/AutoRepairMetricsTest.java | 92 +++++ .../AutoRepairParameterizedTest.java | 2 + .../autorepair/AutoRepairUtilsTest.java | 3 +- 19 files changed, 901 insertions(+), 103 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index b40be44daaec..00b5e6e38ad1 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2787,6 +2787,15 @@ storage_compatibility_mode: NONE # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value # # is used. # parallel_repair_percentage: 3 +# # Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. +# # Defaults to false, as running repairs concurrently on replicas can increase load and also cause anticompaction +# # conflicts while running incremental repair. +# allow_parallel_replica_repair: false +# # An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) +# # are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will +# # prevent starting incremental repairs for this node. Defaults to true and is only evaluated when +# # allow_parallel_replica_repair is false. +# allow_parallel_replica_repair_across_schedules: true # # Repairs materialized views if true. # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index c37ec27c75bf..befaa7ece68e 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2480,6 +2480,15 @@ storage_compatibility_mode: NONE # # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value # # is used. # parallel_repair_percentage: 3 +# # Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. +# # Defaults to false, as running repairs concurrently on replicas can increase load and also cause anticompaction +# # conflicts while running incremental repair. +# allow_parallel_replica_repair: false +# # An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) +# # are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will +# # prevent starting incremental repairs for this node. Defaults to true and is only evaluated when +# # allow_parallel_replica_repair is false. +# allow_parallel_replica_repair_across_schedules: true # # Repairs materialized views if true. # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index a1bf57acf158..06c0db456753 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -195,6 +195,13 @@ the -j option in nodetool repair. larger value is used. | parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If `parallel_repair_count is set`, the larger value is used. +| allow_parallel_replica_repair | false | Whether to allow a node to take its turn running repair while one or more of +its replicas are running repair. Defaults to false, as running repairs concurrently on replicas can increase load and +also cause anticompaction conflicts while running incremental repair. +| allow_parallel_replica_repair_across_schedules | true | An addition to allow_parallel_repair that also blocks repairs +when replicas (including this node itself) are repairing in any schedule. +For example, if a replica is executing full repairs, a value of false will prevent starting incremental repairs for this +node. Defaults to true and is only evaluated when allow_parallel_replica_repair is false. | materialized_view_repair_enabled | false | Repairs materialized views if true. | initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index eb9ce8c8d83f..7294a3675e82 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1114,6 +1114,9 @@ the entire Cassandra cluster in seconds |LongestUnrepairedSec |Gauge |Time since the last repair ran on the node in seconds +|RepairStartLagSec|Gauge |If a repair has not run within min_repair_interval, how long past this value since +repairs last completed. Useful for determining if repairs are behind schedule. + |SucceededTokenRangesCount |Gauge |Number of token ranges successfully repaired on the node |FailedTokenRangesCount |Gauge |Number of token ranges failed to repair on the node @@ -1135,9 +1138,13 @@ the automated repair has been disabled on the node |RepairTurnMyTurnDueToPriority |Counter |Represents the node's turn to repair due to priority set in the automated repair -|RepairTurnMyTurnForceRepair |Counter |Represents the node's turn to repair -due to force repair set in the automated repair +|RepairDelayedByReplica |Counter |Represents occurrences of a node's turn being +delayed because a replica was currently taking its turn. Only revelent if +`allow_parallel_replica_repair` is false. +|RepairDelayedBySchedule |Counter |Represents occurrences of a node's turn being +delayed because it was already being repaired in another schedule. Only relevent +if `allow_parallel_replica_repair_across_schedules` is false. |=== diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java index ee8a0abfea70..3ef24a9eec1b 100644 --- a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -23,7 +23,9 @@ import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.service.AutoRepairService; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; @@ -37,6 +39,7 @@ public class AutoRepairMetrics public final Gauge nodeRepairTimeInSec; public final Gauge clusterRepairTimeInSec; public final Gauge longestUnrepairedSec; + public final Gauge repairStartLagSec; public final Gauge succeededTokenRangesCount; public final Gauge failedTokenRangesCount; public final Gauge skippedTokenRangesCount; @@ -46,8 +49,16 @@ public class AutoRepairMetrics public Counter repairTurnMyTurn; public Counter repairTurnMyTurnDueToPriority; public Counter repairTurnMyTurnForceRepair; + public Counter repairDelayedByReplica; + public Counter repairDelayedBySchedule; + + private final RepairType repairType; + + private volatile int repairStartLagSecVal; + public AutoRepairMetrics(RepairType repairType) { + this.repairType = repairType; AutoRepairMetricsFactory factory = new AutoRepairMetricsFactory(repairType); repairsInProgress = Metrics.register(factory.createMetricName("RepairsInProgress"), new Gauge() @@ -98,6 +109,14 @@ public Integer getValue() } }); + repairStartLagSec = Metrics.register(factory.createMetricName("RepairStartLagSec"), new Gauge() + { + public Integer getValue() + { + return repairStartLagSecVal; + } + }); + succeededTokenRangesCount = Metrics.register(factory.createMetricName("SucceededTokenRangesCount"), new Gauge() { public Integer getValue() @@ -118,6 +137,9 @@ public Integer getValue() repairTurnMyTurnDueToPriority = Metrics.counter(factory.createMetricName("RepairTurnMyTurnDueToPriority")); repairTurnMyTurnForceRepair = Metrics.counter(factory.createMetricName("RepairTurnMyTurnForceRepair")); + repairDelayedByReplica = Metrics.counter(factory.createMetricName("RepairDelayedByReplica")); + repairDelayedBySchedule = Metrics.counter(factory.createMetricName("RepairDelayedBySchedule")); + totalMVTablesConsideredForRepair = Metrics.register(factory.createMetricName("TotalMVTablesConsideredForRepair"), new Gauge() { public Integer getValue() @@ -151,6 +173,23 @@ public void recordTurn(AutoRepairUtils.RepairTurn turn) default: throw new RuntimeException(String.format("Unrecoginized turn: %s", turn.name())); } + this.repairStartLagSecVal = 0; + } + + /** + * Record perceived lag in scheduling repair. + *

        + * Takes the current time and subtracts it from the given last repair finish time. It then compares the difference + * with the min repair interval for this repair type, and if that value is greater than 0, records it. + */ + public void recordRepairStartLag(long lastFinishTimeInMs) + { + long now = AutoRepair.instance.currentTimeMs(); + long deltaFinish = now - lastFinishTimeInMs; + long deltaMinRepairInterval = deltaFinish - AutoRepairService.instance + .getAutoRepairConfig().getRepairMinInterval(repairType) + .toMilliseconds(); + this.repairStartLagSecVal = deltaMinRepairInterval > 0 ? (int) MILLISECONDS.toSeconds(deltaMinRepairInterval) : 0; } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 7238d14acdf6..51704ce2647b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -154,6 +154,14 @@ public void repairAsync(AutoRepairConfig.RepairType repairType) repairExecutors.get(repairType).submit(() -> repair(repairType)); } + /** + * @return The current observed system time in ms. + */ + public long currentTimeMs() + { + return timeFunc.get(); + } + // repair runs a repair session of the given type synchronously. public void repair(AutoRepairConfig.RepairType repairType) { @@ -179,6 +187,13 @@ public void repair(AutoRepairConfig.RepairType repairType) //consistency level to use for local query UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + + // If it's too soon to run repair, don't bother checking if it's our turn. + if (tooSoonToRunRepair(repairType, repairState, config, myId)) + { + return; + } + RepairTurn turn = AutoRepairUtils.myTurnToRunRepair(repairType, myId); if (turn == MY_TURN || turn == MY_TURN_DUE_TO_PRIORITY || turn == MY_TURN_FORCE_REPAIR) { @@ -189,10 +204,6 @@ public void repair(AutoRepairConfig.RepairType repairType) // When doing force repair, we want to repair without -pr. boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) && turn != MY_TURN_FORCE_REPAIR; - if (tooSoonToRunRepair(repairType, repairState, config, myId)) - { - return; - } long startTime = timeFunc.get(); logger.info("My host id: {}, my turn to run repair...repair primary-ranges only? {}", myId, @@ -387,7 +398,8 @@ private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoR // we should check for the node's repair history in the DB repairState.setLastRepairTime(AutoRepairUtils.getLastRepairTimeForNode(repairType, myId)); } - /** check if it is too soon to run repair. one of the reason we + /* + * check if it is too soon to run repair. one of the reason we * should not run frequent repair is that repair triggers * memtable flush */ diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index a23cd3021232..739a57e9c746 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -275,6 +275,26 @@ public void setParallelRepairCount(RepairType repairType, int count) getOptions(repairType).parallel_repair_count = count; } + public boolean getAllowParallelReplicaRepair(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.allow_parallel_replica_repair); + } + + public void setAllowParallelReplicaRepair(RepairType repairType, boolean enabled) + { + getOptions(repairType).allow_parallel_replica_repair = enabled; + } + + public boolean getAllowParallelReplicaRepairAcrossSchedules(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.allow_parallel_replica_repair_across_schedules); + } + + public void setAllowParallelReplicaRepairAcrossSchedules(RepairType repairType, boolean enabled) + { + getOptions(repairType).allow_parallel_replica_repair_across_schedules = enabled; + } + public boolean getMaterializedViewRepairEnabled(RepairType repairType) { return applyOverrides(repairType, opt -> opt.materialized_view_repair_enabled); @@ -440,6 +460,8 @@ protected static Options getDefaultOptions() opts.number_of_repair_threads = 1; opts.parallel_repair_count = 3; opts.parallel_repair_percentage = 3; + opts.allow_parallel_replica_repair = false; + opts.allow_parallel_replica_repair_across_schedules = true; opts.sstable_upper_threshold = 10000; opts.ignore_dcs = new HashSet<>(); opts.repair_primary_token_range_only = true; @@ -467,6 +489,15 @@ protected static Options getDefaultOptions() // Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value // is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. public volatile Integer parallel_repair_percentage; + // Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. + // Defaults to false, as running repairs concurrently on replicas can increase load and also cause + // anticompaction conflicts while running incremental repair. + public volatile Boolean allow_parallel_replica_repair; + // An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) + // are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will + // prevent starting incremental repairs for this node. Defaults to true and is only evaluated when + // allow_parallel_replica_repair is false. + public volatile Boolean allow_parallel_replica_repair_across_schedules; // Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good // tables. public volatile Integer sstable_upper_threshold; @@ -514,6 +545,8 @@ public String toString() ", number_of_repair_threads=" + number_of_repair_threads + ", parallel_repair_count=" + parallel_repair_count + ", parallel_repair_percentage=" + parallel_repair_percentage + + ", allow_parallel_replica_repair=" + allow_parallel_replica_repair + + ", allow_parallel_replica_repair_across_schedules=" + allow_parallel_replica_repair_across_schedules + ", sstable_upper_threshold=" + sstable_upper_threshold + ", min_repair_interval=" + min_repair_interval + ", ignore_dcs=" + ignore_dcs + diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java index 19f0558bea64..6da487e5e06b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -21,22 +21,33 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Splitter; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.EndpointsByRange; +import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.locator.LocalStrategy; import org.slf4j.Logger; @@ -57,6 +68,9 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.MetaStrategy; import org.apache.cassandra.locator.NetworkTopologyStrategy; +import org.apache.cassandra.locator.RangesAtEndpoint; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; @@ -69,6 +83,7 @@ import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.transport.Dispatcher; @@ -261,9 +276,10 @@ public static class CurrentRepairStatus public Set hostIdsWithOnGoingRepair; // hosts that is running repair public Set hostIdsWithOnGoingForceRepair; // hosts that is running repair because of force repair Set priority; + public AutoRepairHistory myRepairHistory; List historiesWithoutOnGoingRepair; // hosts that is NOT running repair - public CurrentRepairStatus(List repairHistories, Set priority) + public CurrentRepairStatus(List repairHistories, Set priority, UUID myId) { hostIdsWithOnGoingRepair = new HashSet<>(); hostIdsWithOnGoingForceRepair = new HashSet<>(); @@ -286,10 +302,19 @@ public CurrentRepairStatus(List repairHistories, Set pr { historiesWithoutOnGoingRepair.add(history); } + if (history.hostId.equals(myId)) + { + myRepairHistory = history; + } } this.priority = priority; } + public Set getAllHostsWithOngoingRepair() + { + return Sets.union(hostIdsWithOnGoingRepair, hostIdsWithOnGoingForceRepair); + } + public String toString() { return MoreObjects.toStringHelper(this). @@ -297,6 +322,7 @@ public String toString() add("hostIdsWithOnGoingForceRepair", hostIdsWithOnGoingForceRepair). add("historiesWithoutOnGoingRepair", historiesWithoutOnGoingRepair). add("priority", priority). + add("myRepairHistory", myRepairHistory). toString(); } } @@ -311,7 +337,7 @@ public static List getAutoRepairHistory(RepairType repairType repairHistoryResult = UntypedResultSet.create(repairStatusRows.result); List repairHistories = new ArrayList<>(); - if (repairHistoryResult.size() > 0) + if (!repairHistoryResult.isEmpty()) { for (UntypedResultSet.Row row : repairHistoryResult) { @@ -323,7 +349,7 @@ public static List getAutoRepairHistory(RepairType repairType long lastRepairFinishTime = row.getLong(COL_REPAIR_FINISH_TS, 0); Set deleteHosts = row.getSet(COL_DELETE_HOSTS, UUIDType.instance); long deleteHostsUpdateTime = row.getLong(COL_DELETE_HOSTS_UPDATE_TIME, 0); - Boolean forceRepair = row.has(COL_FORCE_REPAIR) ? row.getBoolean(COL_FORCE_REPAIR) : false; + boolean forceRepair = row.has(COL_FORCE_REPAIR) && row.getBoolean(COL_FORCE_REPAIR); repairHistories.add(new AutoRepairHistory(hostId, repairTurn, lastRepairStartTime, lastRepairFinishTime, deleteHosts, deleteHostsUpdateTime, forceRepair)); } @@ -373,12 +399,6 @@ public static void setForceRepair(RepairType repairType, UUID hostId) logger.info("Set force repair repair type: {}, node: {}", repairType, hostId); } - public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType) - { - List autoRepairHistories = getAutoRepairHistory(repairType); - return getCurrentRepairStatus(repairType, autoRepairHistories); - } - public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) { ResultMessage.Rows rows = selectLastRepairTimeForNode.execute(QueryState.forInternalCalls(), @@ -395,13 +415,12 @@ public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) return repairTime.one().getLong(COL_REPAIR_FINISH_TS); } - public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories) + @VisibleForTesting + public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories, UUID myId) { if (autoRepairHistories != null) { - CurrentRepairStatus status = new CurrentRepairStatus(autoRepairHistories, getPriorityHostIds(repairType)); - - return status; + return new CurrentRepairStatus(autoRepairHistories, getPriorityHostIds(repairType), myId); } return null; } @@ -418,7 +437,8 @@ protected static TreeSet getHostIdsInCurrentRing(RepairType repairType, Co logger.info("Ignore node {} because its datacenter is {}", node, nodeDC); continue; } - /** Check if endpoint state exists in gossip or not. If it + /* + * Check if endpoint state exists in gossip or not. If it * does not then this maybe a ghost node so ignore it */ if (Gossiper.instance.isAlive(node.broadcastAddress)) @@ -447,6 +467,262 @@ public static AutoRepairHistory getHostWithLongestUnrepairTime(RepairType repair return getHostWithLongestUnrepairTime(autoRepairHistories); } + /** + * Convenience method to resolve the broadcast address of a host id from {@link ClusterMetadata} + * @return broadcast address if it exists in CMS, otherwise null. + */ + @Nullable + private static InetAddressAndPort getBroadcastAddress(UUID hostId) + { + Directory directory = ClusterMetadata.current().directory; + + NodeId nodeId = directory.nodeIdFromHostId(hostId); + if (nodeId != null) + { + NodeAddresses nodeAddresses = directory.getNodeAddresses(nodeId); + if (nodeAddresses != null) + { + return nodeAddresses.broadcastAddress; + } + } + return null; + } + + /** + * @return Map of broadcast address to host id, if a broadcast address cannot be found for a host, it is + * not included in the map. + */ + private static Map getBroadcastAddressToHostIdMap(Set hosts) + { + // Get a mapping of endpoint : host id + Map broadcastAddressMap = new HashMap<>(hosts.size()); + for (UUID hostId : hosts) + { + InetAddressAndPort broadcastAddress = getBroadcastAddress(hostId); + if (broadcastAddress == null) + { + logger.warn("Could not resolve broadcast address from host id {} in ClusterMetadata can't accurately " + + "determine if this node is a replica of the local node.", hostId); + } + else + { + broadcastAddressMap.put(broadcastAddress, hostId); + } + } + return broadcastAddressMap; + } + + /** + * @return Mapping of unique replication strategy to keyspaces using that strategy that we care about repairing. + */ + private static Map> getReplicationStrategies() + { + // Collect all unique replication strategies among all keyspaces. + Map> replicationStrategies = new HashMap<>(); + for (Keyspace keyspace : Keyspace.all()) + { + if (AutoRepairUtils.shouldConsiderKeyspace(keyspace)) + { + replicationStrategies.computeIfAbsent(keyspace.getReplicationStrategy(), k -> new ArrayList<>()) + .add(keyspace.getName()); + } + } + return replicationStrategies; + } + + /** + * Collects all hosts being repaired among all active repair schedules and their schedule if + * {@link AutoRepairConfig#getAllowParallelReplicaRepairAcrossSchedules(RepairType)} is true for this repairType. + * Accepts the currently evaluated repairType's schedule as an optimization to avoid grabbing its repair status an + * additional time. + * + * @param myRepairType The repair type schedule being evaluated. + * @param myRepairStatus The repair status for that repair type. + * @return All hosts among active schedules currently being repaired. + */ + private static Map getHostsBeingRepaired(RepairType myRepairType, CurrentRepairStatus myRepairStatus) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + + Map hostsBeingRepaired = myRepairStatus.getAllHostsWithOngoingRepair().stream() + .collect(Collectors.toMap((h) -> h, (v) -> myRepairType)); + + // If we don't allow repairing across schedules, iterate over other enabled schedules and include hosts + // actively being repaired. + if (!config.getAllowParallelReplicaRepairAcrossSchedules(myRepairType)) + { + for (RepairType repairType : RepairType.values()) + { + if (myRepairType == repairType) + continue; + + if (config.isAutoRepairEnabled(repairType)) + { + CurrentRepairStatus repairStatus = getCurrentRepairStatus(repairType, getAutoRepairHistory(repairType), null); + if (repairStatus != null) + { + for (UUID hostId : repairStatus.getAllHostsWithOngoingRepair()) + { + hostsBeingRepaired.putIfAbsent(hostId, repairType); + } + } + } + } + } + return hostsBeingRepaired; + } + + /** + * Identifies the most eligible host to repair for nodes preceding or equal to this nodes' lastRepairFinishTime. + * The criteria for this is to find the node with the oldest last repair finish time of which none of its replicas + * are currently under repair. + * @return The most eligible host to repair or null if no candidates before and including this nodes' current repair status. + */ + @VisibleForTesting + public static AutoRepairHistory getMostEligibleHostToRepair(RepairType repairType, CurrentRepairStatus currentRepairStatus, UUID myId) + { + // 0. If this repairType allows parallel replica repair, short circuit and return the host with the longest unrepair time + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (config.getAllowParallelReplicaRepair(repairType)) + { + return getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); + } + + // 1. Sort repair histories from oldest completed to newest + Stream finishedRepairHistories = currentRepairStatus.historiesWithoutOnGoingRepair + .stream() + .sorted(Comparator.comparingLong(h -> h.lastRepairFinishTime)); + + // 2. Optimization: Truncate repair histories after myId so we don't evaluate anything more recent as if we + // aren't interested in anything that isn't this node. + final AtomicBoolean myHistoryFound = new AtomicBoolean(false); + finishedRepairHistories = finishedRepairHistories.takeWhile((history) -> { + if (myHistoryFound.get()) return false; + + myHistoryFound.set(history.hostId.equals(myId)); + return true; + }); + + // If there are any hosts with ongoing repair, filter the repair histories to not include nodes whose replicas + // are ongoing repair. + Map hostsBeingRepairedToRepairType = getHostsBeingRepaired(repairType, currentRepairStatus); + + // 3. If I am already actively being repaired in another schedule, defer submitting repairs; if already + // repairing for this type, return node so it can take its turn. + RepairType alreadyRepairingType = hostsBeingRepairedToRepairType.get(myId); + if (alreadyRepairingType != null) + { + if (repairType != alreadyRepairingType) + { + logger.info("Deferring repair because I am already actively repairing in schedule {}", hostsBeingRepairedToRepairType.get(myId)); + AutoRepairMetricsManager.getMetrics(repairType).repairDelayedBySchedule.inc(); + return null; + } + else if (currentRepairStatus.myRepairHistory != null) + { + // if the repair type matches this repair, assume the node was restarted while repairing, return node + // so it can take its turn. + logAlreadyMyTurn(); + return currentRepairStatus.myRepairHistory; + } + } + + if (!hostsBeingRepairedToRepairType.isEmpty()) + { + // 4. Extract InetAddresses for each UUID as replicas are identified by their address. + Map hostsBeingRepaired = getBroadcastAddressToHostIdMap(hostsBeingRepairedToRepairType.keySet()); + + // 5. Collect unique replication strategies and group them up with their keyspaces. + Map> replicationStrategies = getReplicationStrategies(); + + // 6. Filter out repair histories who have a replica being repaired, note that this is lazy, given the stream + // is completed using findFirst, it should stop as soon as the matching criteria is met. + finishedRepairHistories = finishedRepairHistories.filter((history) -> !hasReplicaWithOngoingRepair(history, + myId, + repairType, + hostsBeingRepaired, + hostsBeingRepairedToRepairType, + replicationStrategies)); + } + + // 7. Select the first (oldest lastRepairFinishTime) repair history without replicas being repaired + return finishedRepairHistories.findFirst().orElse(null); + } + + + /** + * @return Whether the host for the given eligibleRepairHistory has any replicas in hostsBeingRepaired. + * @param eligibleHistory History of node to check + * @param myId Host id of this node, if the repair history is for this node, additional logging will take place. + * @param myRepairType repair type being evaluated + * @param hostsBeingRepaired Hosts being repaired. + * @param hostIdToRepairType mapping of hosts being repaired to the repair type its being repaired for. + * @param replicationStrategies Mapping of unique replication strategies to keyspaces having that strategy. + */ + private static boolean hasReplicaWithOngoingRepair(AutoRepairHistory eligibleHistory, + UUID myId, + RepairType myRepairType, + Map hostsBeingRepaired, + Map hostIdToRepairType, + Map> replicationStrategies) + { + // If no broadcast address found for this host id in cluster metadata, just skip it, a node should always + // see itself in cluster metadata. + InetAddressAndPort eligibleBroadcastAddress = getBroadcastAddress(eligibleHistory.hostId); + if (eligibleBroadcastAddress == null) + { + return true; + } + + // For each replication strategy, determine if host being repaired is a replica of the local node. + for (Map.Entry> entry : replicationStrategies.entrySet()) + { + AbstractReplicationStrategy replicationStrategy = entry.getKey(); + EndpointsByRange endpointsByRange = replicationStrategy.getRangeAddresses(ClusterMetadata.current()); + + // get ranges of the eligible address for the given replication strategy. + RangesAtEndpoint rangesAtEndpoint = StorageService.instance.getReplicas(replicationStrategy, eligibleBroadcastAddress); + for (Replica replica : rangesAtEndpoint) + { + // get the endpoints involved in this range. + EndpointsForRange endpointsForRange = endpointsByRange.get(replica.range()); + // For each host in this range... + for (InetAddressAndPort inetAddressAndPort : endpointsForRange.endpoints()) + { + // If the address of the node in the range belongs to a host being repaired, return true. + UUID hostId = hostsBeingRepaired.get(inetAddressAndPort); + if (hostId != null) + { + // log if the repair history matches the current running node. + InetAddressAndPort myBroadcastAddress = getBroadcastAddress(myId); + if (myBroadcastAddress != null && myBroadcastAddress.equals(eligibleBroadcastAddress)) + { + logger.info("Deferring repair because replica {} ({}) with shared ranges for " + + "{} keyspace(s) (e.g. {}) is currently taking its turn for schedule {}", + hostId, inetAddressAndPort, entry.getValue().size(), entry.getValue().get(0), + hostIdToRepairType.get(hostId)); + AutoRepairMetricsManager.getMetrics(myRepairType).repairDelayedByReplica.inc(); + } + else if (logger.isDebugEnabled()) + { + logger.debug("Not considering node {} ({}) for repair as it has replica {} ({}) with " + + "shared ranges for {} keyspace(s) (e.g. {}) which is currently taking its " + + "turn for schedule {}", + eligibleHistory.hostId, eligibleBroadcastAddress, + hostId, inetAddressAndPort, entry.getValue().size(), entry.getValue().get(0), + hostIdToRepairType.get(hostId)); + + } + return true; + } + } + } + } + + // No replicas found of eligible host. + return false; + } + private static AutoRepairHistory getHostWithLongestUnrepairTime(List autoRepairHistories) { if (autoRepairHistories == null) @@ -480,6 +756,12 @@ public static int getMaxNumberOfNodeRunAutoRepair(RepairType repairType, int gro return Math.max(1, value); } + private static void logAlreadyMyTurn() + { + logger.warn("This node already was considered to having an ongoing repair for this repair type, must have " + + "been restarted, taking my turn back"); + } + @VisibleForTesting public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) { @@ -493,7 +775,7 @@ public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) List autoRepairHistories = getAutoRepairHistory(repairType); Set autoRepairHistoryIds = new HashSet<>(); - // 1. Remove any node that is not part of group based on goissip info + // 1. Remove any node that is not part of group based on gossip info if (autoRepairHistories != null) { for (AutoRepairHistory nodeHistory : autoRepairHistories) @@ -501,7 +783,7 @@ public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) autoRepairHistoryIds.add(nodeHistory.hostId); // clear delete_hosts if the node's delete hosts is not growing for more than two hours AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); - if (nodeHistory.deleteHosts.size() > 0 + if (!nodeHistory.deleteHosts.isEmpty() && config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( currentTimeMillis() - nodeHistory.deleteHostsUpdateTime )) @@ -538,11 +820,14 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } - //get current repair status - CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); + // get updated current repair status + CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, getAutoRepairHistory(repairType), myId); if (currentRepairStatus != null) { - logger.info("Latest repair status {}", currentRepairStatus); + if (logger.isDebugEnabled()) + { + logger.debug("Latest repair status {}", currentRepairStatus); + } //check if I am forced to run repair for (AutoRepairHistory history : currentRepairStatus.historiesWithoutOnGoingRepair) { @@ -553,6 +838,23 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) } } + // check if node was already indicated as having an ongoing repair, this may happen when a node restarts + // before finishing repairing. + if (currentRepairStatus != null && currentRepairStatus.getAllHostsWithOngoingRepair().contains(myId)) + { + logAlreadyMyTurn(); + + // use the previously chosen turn. + if (currentRepairStatus.myRepairHistory != null && currentRepairStatus.myRepairHistory.repairTurn != null) + { + return RepairTurn.valueOf(currentRepairStatus.myRepairHistory.repairTurn); + } + else + { + return MY_TURN; + } + } + int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepair(repairType, autoRepairHistories == null ? 0 : autoRepairHistories.size()); logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); @@ -567,16 +869,17 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) { // try to fetch again autoRepairHistories = getAutoRepairHistory(repairType); - currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories); - if (autoRepairHistories == null || currentRepairStatus == null) + if (autoRepairHistories == null) { logger.error("No record found"); return NOT_MY_TURN; } + + currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories, myId); } UUID priorityHostId = null; - if (currentRepairStatus != null && currentRepairStatus.priority != null) + if (currentRepairStatus.priority != null) { for (UUID priorityID : currentRepairStatus.priority) { @@ -597,21 +900,37 @@ else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) if (priorityHostId != null && !myId.equals(priorityHostId)) { logger.info("Priority list is not empty and I'm not the first node in the list, not my turn." + - "First node in priority list is {}", ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(priorityHostId))); + "First node in priority list is {}", getBroadcastAddress(priorityHostId)); return NOT_MY_TURN; } + if (myId.equals(priorityHostId)) { //I have a priority for repair hence its my turn now return MY_TURN_DUE_TO_PRIORITY; } - // get the longest unrepaired node from the nodes which are not running repair - AutoRepairHistory defaultNodeToBeRepaired = getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); - //check who is next, which is helpful for debugging - logger.info("Next node to be repaired for repair type {} by default: {}", repairType, defaultNodeToBeRepaired); - if (defaultNodeToBeRepaired != null && defaultNodeToBeRepaired.hostId.equals(myId)) + + // Determine if this node is the most eligible host to repair. + AutoRepairHistory nodeToBeRepaired = getMostEligibleHostToRepair(repairType, currentRepairStatus, myId); + if (nodeToBeRepaired != null) { - return MY_TURN; + if (nodeToBeRepaired.hostId.equals(myId)) + { + logger.info("This node is selected to be repaired for repair type {}", repairType); + return MY_TURN; + } + + // log which node is next, which is helpful for debugging + logger.info("Next node to be repaired for repair type {}: {} ({})", repairType, + getBroadcastAddress(nodeToBeRepaired.hostId), + nodeToBeRepaired); + } + + // If this node is not identified as most eligible, set the repair lag time. + if (currentRepairStatus.myRepairHistory != null) + { + AutoRepairMetricsManager.getMetrics(repairType) + .recordRepairStartLag(currentRepairStatus.myRepairHistory.lastRepairFinishTime); } } else if (currentRepairStatus.hostIdsWithOnGoingForceRepair.contains(myId)) @@ -656,7 +975,6 @@ static void updateFinishAutoRepairHistory(RepairType repairType, UUID myId, long ByteBufferUtil.bytes(repairType.toString()), ByteBufferUtil.bytes(myId) )), Dispatcher.RequestTime.forImmediateExecution()); - // Do not remove beblow log, the log is used by dtest logger.info("Auto repair finished for {}", myId); } @@ -756,7 +1074,7 @@ public static Set getPriorityHostIds(RepairType repairType) repairPriorityResult = UntypedResultSet.create(repairPriorityRows.result); Set priorities = null; - if (repairPriorityResult.size() > 0) + if (!repairPriorityResult.isEmpty()) { // there should be only one row UntypedResultSet.Row row = repairPriorityResult.one(); @@ -774,7 +1092,13 @@ public static Set getPriorityHosts(RepairType repairType) Set hosts = new HashSet<>(); for (UUID hostId : getPriorityHostIds(repairType)) { - hosts.add(ClusterMetadata.current().directory.addresses.get(NodeId.fromUUID(hostId)).broadcastAddress); + InetAddressAndPort broadcastAddress = getBroadcastAddress(hostId); + if (broadcastAddress == null) + { + logger.warn("Could not resolve broadcastAddress for {}, skipping considering it as a priority host", hostId); + continue; + } + hosts.add(broadcastAddress); } return hosts; } diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 9f66a92abc95..4d34a53268f1 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -202,6 +202,18 @@ public void setParallelRepairCount(String repairType, int count) config.setParallelRepairCount(RepairType.parse(repairType), count); } + @Override + public void setAllowParallelReplicaRepair(String repairType, boolean enabled) + { + config.setAllowParallelReplicaRepair(RepairType.parse(repairType), enabled); + } + + @Override + public void setAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled) + { + config.setAllowParallelReplicaRepairAcrossSchedules(RepairType.parse(repairType), enabled); + } + @Override public void setMVRepairEnabled(String repairType, boolean enabled) { @@ -223,7 +235,7 @@ public Set getOnGoingRepairHostIds(String repairType) return Collections.emptySet(); } Set hostIds = new HashSet<>(); - AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(RepairType.parse(repairType))); + AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(RepairType.parse(repairType)), null); for (UUID id : currentRepairStatus.hostIdsWithOnGoingRepair) { hostIds.add(id.toString()); @@ -273,21 +285,23 @@ private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig co appendConfig(sb, "priority_hosts", Joiner.on(',').skipNulls().join(priorityHosts)); } - appendConfig(sb , "min_repair_interval", config.getRepairMinInterval(repairType)); - appendConfig(sb , "repair_by_keyspace", config.getRepairByKeyspace(repairType)); - appendConfig(sb , "number_of_repair_threads", config.getRepairThreads(repairType)); - appendConfig(sb , "sstable_upper_threshold", config.getRepairSSTableCountHigherThreshold(repairType)); - appendConfig(sb , "table_max_repair_time", config.getAutoRepairTableMaxRepairTime(repairType)); - appendConfig(sb , "ignore_dcs", config.getIgnoreDCs(repairType)); - appendConfig(sb , "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); - appendConfig(sb , "parallel_repair_count", config.getParallelRepairCount(repairType)); - appendConfig(sb , "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); - appendConfig(sb , "materialized_view_repair_enabled", config.getMaterializedViewRepairEnabled(repairType)); - appendConfig(sb , "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); - appendConfig(sb , "repair_session_timeout", config.getRepairSessionTimeout(repairType)); - appendConfig(sb , "force_repair_new_node", config.getForceRepairNewNode(repairType)); - appendConfig(sb , "repair_max_retries", config.getRepairMaxRetries(repairType)); - appendConfig(sb , "repair_retry_backoff", config.getRepairRetryBackoff(repairType)); + appendConfig(sb, "min_repair_interval", config.getRepairMinInterval(repairType)); + appendConfig(sb, "repair_by_keyspace", config.getRepairByKeyspace(repairType)); + appendConfig(sb, "number_of_repair_threads", config.getRepairThreads(repairType)); + appendConfig(sb, "sstable_upper_threshold", config.getRepairSSTableCountHigherThreshold(repairType)); + appendConfig(sb, "table_max_repair_time", config.getAutoRepairTableMaxRepairTime(repairType)); + appendConfig(sb, "ignore_dcs", config.getIgnoreDCs(repairType)); + appendConfig(sb, "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); + appendConfig(sb, "parallel_repair_count", config.getParallelRepairCount(repairType)); + appendConfig(sb, "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); + appendConfig(sb, "allow_parallel_replica_repair", config.getAllowParallelReplicaRepair(repairType)); + appendConfig(sb, "allow_parallel_replica_repair_across_schedules", config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + appendConfig(sb, "materialized_view_repair_enabled", config.getMaterializedViewRepairEnabled(repairType)); + appendConfig(sb, "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); + appendConfig(sb, "repair_session_timeout", config.getRepairSessionTimeout(repairType)); + appendConfig(sb, "force_repair_new_node", config.getForceRepairNewNode(repairType)); + appendConfig(sb, "repair_max_retries", config.getRepairMaxRetries(repairType)); + appendConfig(sb, "repair_retry_backoff", config.getRepairRetryBackoff(repairType)); final ParameterizedClass splitterClass = config.getTokenRangeSplitter(repairType); final String splitterClassName = splitterClass.class_name != null ? splitterClass.class_name : AutoRepairConfig.DEFAULT_SPLITTER.getName(); diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java index 72d9da7c5a0c..181c6008f533 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -53,6 +53,10 @@ public interface AutoRepairServiceMBean public void setParallelRepairCount(String repairType, int count); + public void setAllowParallelReplicaRepair(String repairType, boolean enabled); + + public void setAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled); + public void setMVRepairEnabled(String repairType, boolean enabled); public boolean isAutoRepairDisabled(); diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index bd9b67bc37b0..8ce1dd94bd4b 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -388,8 +388,12 @@ public RangesAtEndpoint getLocalReplicas(String keyspaceName) public RangesAtEndpoint getReplicas(String keyspaceName, InetAddressAndPort endpoint) { - return Keyspace.open(keyspaceName).getReplicationStrategy() - .getAddressReplicas(ClusterMetadata.current(), endpoint); + return getReplicas(Keyspace.open(keyspaceName).getReplicationStrategy(), endpoint); + } + + public RangesAtEndpoint getReplicas(AbstractReplicationStrategy replicationStrategy, InetAddressAndPort endpoint) + { + return replicationStrategy.getAddressReplicas(ClusterMetadata.current(), endpoint); } public List> getLocalRanges(String ks) diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index bf93bd179d4d..478fc0e55a00 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2640,6 +2640,16 @@ public void setAutoRepairParallelRepairCount(String repairType, int count) autoRepairProxy.setParallelRepairCount(repairType, count); } + public void setAutoRepairAllowParallelReplicaRepair(String repairType, boolean enabled) + { + autoRepairProxy.setAllowParallelReplicaRepair(repairType, enabled); + } + + public void setAutoRepairAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled) + { + autoRepairProxy.setAllowParallelReplicaRepairAcrossSchedules(repairType, enabled); + } + public void setAutoRepairPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) { autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java index 5a9a1411bb59..cc00cabd0633 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -47,7 +47,9 @@ public class SetAutoRepairConfig extends NodeToolCmd "[start_scheduler|number_of_repair_threads|min_repair_interval|sstable_upper_threshold" + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + - "|parallel_repair_count|parallel_repair_percentage|materialized_view_repair_enabled|repair_max_retries" + + "|parallel_repair_count|parallel_repair_percentage" + + "|allow_parallel_replica_repair|allow_parallel_repair_across_schedules" + + "|materialized_view_repair_enabled|repair_max_retries" + "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration" + "|repair_by_keyspace|token_range_splitter.]", required = true) @@ -148,6 +150,12 @@ public void execute(NodeProbe probe) case "parallel_repair_percentage": probe.setAutoRepairParallelRepairPercentage(repairTypeStr, Integer.parseInt(paramVal)); break; + case "allow_parallel_replica_repair": + probe.setAutoRepairAllowParallelReplicaRepair(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; + case "allow_parallel_replica_repair_across_schedules": + probe.setAutoRepairAllowParallelReplicaRepairAcrossSchedules(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; case "materialized_view_repair_enabled": probe.setAutoRepairMaterializedViewRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); break; diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java new file mode 100644 index 000000000000..a00e713cf298 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.repair; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableMap; + +import org.apache.cassandra.Util; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.service.AutoRepairService; + +import static org.hamcrest.Matchers.greaterThan; +import static org.junit.Assert.assertEquals; + +/** + * Distributed tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} scheduler's + * allow_parallel_replica_repair_across_schedules feature. + */ +public class AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest extends TestBaseImpl +{ + private static Cluster cluster; + + @BeforeClass + public static void init() throws IOException + { + // Configure a cluster with preview and incremental repair enabled in a way that preview repair can be + // run on all three nodes concurrently, but incremental repair can only be run when there are no parallel + // repairs. We should detect contention in the incremental repair scheduler but not preview repaired + // scheduler as a result. + cluster = Cluster.build(3) + .withConfig(config -> config + .set("auto_repair", + ImmutableMap.of( + "repair_type_overrides", + ImmutableMap.of(AutoRepairConfig.RepairType.PREVIEW_REPAIRED.getConfigName(), + ImmutableMap.of( + // Configure preview repair to run frequently to + // provoke contention with incremental scheduler. + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "3", + "allow_parallel_replica_repair", "true", + "min_repair_interval", "5s"), + AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "3", + // Don't allow parallel replica repair across + // schedules + "allow_parallel_replica_repair", "false", + "allow_parallel_replica_repair_across_schedules", "false", + "min_repair_interval", "5s")))) + .set("auto_repair.enabled", "true") + .set("auto_repair.global_settings.repair_retry_backoff", "5s") + .set("auto_repair.repair_task_min_duration", "0s") + .set("auto_repair.repair_check_interval", "5s")) + .start(); + + cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); + } + + @AfterClass + public static void tearDown() + { + cluster.close(); + } + + @Test + public void testScheduler() + { + cluster.forEach(i -> i.runOnInstance(() -> { + try + { + AutoRepairService.setup(); + AutoRepair.instance.setup(); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + })); + + // validate that the repair ran on all nodes + cluster.forEach(i -> i.runOnInstance(() -> { + // Expect contention on incremental repair across schedules + AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); + Util.spinAssert("AutoRepair has not observed any replica contention in INCREMENTAL repair", + greaterThan(0L), + incrementalMetrics.repairDelayedBySchedule::getCount, + 5, + TimeUnit.MINUTES); + + // No repair contention should be observed for preview repaired since allow_parallel_replica_repair was true + AutoRepairMetrics previewMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.PREVIEW_REPAIRED); + assertEquals(0L, previewMetrics.repairDelayedByReplica.getCount()); + assertEquals(0L, previewMetrics.repairDelayedBySchedule.getCount()); + })); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index c7b7448d1a24..e726dc1a17f7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -27,9 +27,13 @@ import com.google.common.collect.ImmutableMap; import org.apache.cassandra.Util; -import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -46,7 +50,7 @@ import static org.junit.Assert.assertEquals; /** - * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + * Distributed tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} scheduler */ public class AutoRepairSchedulerTest extends TestBaseImpl { @@ -61,33 +65,57 @@ public static void init() throws IOException // Create SimpleDateFormat object with the given pattern sdf = new SimpleDateFormat(pattern); sdf.setLenient(false); - cluster = Cluster.build(3).withConfig(config -> config - .set("auto_repair", - ImmutableMap.of( - "repair_type_overrides", - ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), - ImmutableMap.of( - "initial_scheduler_delay", "5s", - "enabled", "true", - "parallel_repair_count", "1", - "parallel_repair_percentage", "0", - "min_repair_interval", "1s"), - AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), - ImmutableMap.of( - "initial_scheduler_delay", "5s", - "enabled", "true", - "parallel_repair_count", "1", - "parallel_repair_percentage", "0", - "min_repair_interval", "1s")))) - .set("auto_repair.enabled", "true") - .set("auto_repair.global_settings.repair_by_keyspace", "true") - .set("auto_repair.repair_task_min_duration", "0s") - .set("auto_repair.repair_check_interval", "10s")).start(); + // Configure a 3-node cluster with num_tokens: 4 and auto_repair enabled + cluster = Cluster.build(3) + .withTokenCount(4) + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(3, 4)) + .withConfig(config -> config + .set("num_tokens", 4) + .set("auto_repair", + ImmutableMap.of( + "repair_type_overrides", + ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "2", + // Allow parallel replica repair to allow replicas + // to execute full repair at same time. + "allow_parallel_replica_repair", "true", + "min_repair_interval", "15s"), + AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + // Set parallel repair count to 3 to provoke + // contention between replicas when scheduling. + "parallel_repair_count", "3", + // Disallow parallel replica repair to prevent + // replicas from issuing incremental repair at + // same time. + "allow_parallel_replica_repair", "false", + // Run more aggressively since full repair is + // less restrictive about when it can run repair, + // so need to check more frequently to allow + // incremental to get an attempt in. + "min_repair_interval", "5s")))) + .set("auto_repair.enabled", "true") + .set("auto_repair.global_settings.repair_by_keyspace", "true") + .set("auto_repair.global_settings.repair_retry_backoff", "5s") + .set("auto_repair.repair_task_min_duration", "0s") + .set("auto_repair.repair_check_interval", "5s")) + .start(); cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); } + @AfterClass + public static void tearDown() + { + cluster.close(); + } + @Test public void testScheduler() throws ParseException { @@ -98,10 +126,7 @@ public void testScheduler() throws ParseException cluster.forEach(i -> i.runOnInstance(() -> { try { - DatabaseDescriptor.setCDCOnRepairEnabled(false); - DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); - AutoRepairService.instance.setup(); - DatabaseDescriptor.setCDCOnRepairEnabled(false); + AutoRepairService.setup(); AutoRepair.instance.setup(); } catch (Exception e) @@ -112,17 +137,39 @@ public void testScheduler() throws ParseException // validate that the repair ran on all nodes cluster.forEach(i -> i.runOnInstance(() -> { - Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", + // Reduce sleeping if repair finishes quickly to speed up test but make it non-zero to provoke some + // contention. + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("1s"); + + AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); + Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", greaterThan(0L), - AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.FULL)::getLastRepairTime, + () -> incrementalMetrics.nodeRepairTimeInSec.getValue().longValue(), 5, TimeUnit.MINUTES); - Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", + + // Expect some contention on incremental repair. + Util.spinAssert("AutoRepair has not observed any replica contention in INCREMENTAL repair", greaterThan(0L), - AutoRepair.instance.repairStates.get(AutoRepairConfig.RepairType.INCREMENTAL)::getLastRepairTime, + incrementalMetrics.repairDelayedByReplica::getCount, 5, TimeUnit.MINUTES); + // Do not expect any contention across schedules since allow_parallel_replica_repairs across schedules + // was not configured. + assertEquals(0L, incrementalMetrics.repairDelayedBySchedule.getCount()); + + AutoRepairMetrics fullMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.FULL); + Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", + greaterThan(0L), + () -> fullMetrics.nodeRepairTimeInSec.getValue().longValue(), + 5, + TimeUnit.MINUTES); + + // No repair contention should be observed for full repair since allow_parallel_replica_repair was true + assertEquals(0L, fullMetrics.repairDelayedByReplica.getCount()); + assertEquals(0L, fullMetrics.repairDelayedBySchedule.getCount()); })); + validate(AutoRepairConfig.RepairType.FULL.toString()); validate(AutoRepairConfig.RepairType.INCREMENTAL.toString()); } @@ -137,7 +184,7 @@ private void validate(String repairType) throws ParseException // repair_type Assert.assertEquals(repairType, row[0].toString()); // host_id - UUID.fromString(row[1].toString()); + Assert.assertNotNull(UUID.fromString(row[1].toString())); // ensure there is a legit repair_start_ts and repair_finish_ts sdf.parse(row[2].toString()); sdf.parse(row[3].toString()); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 2c3e4401ed9f..0cf51ca40884 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -20,7 +20,6 @@ import java.util.HashSet; import java.util.Map; -import java.util.Objects; import java.util.Collections; import java.util.Set; @@ -155,7 +154,7 @@ public void testSetRepairThreads() { config.setRepairThreads(repairType, 5); - assert config.getOptions(repairType).number_of_repair_threads == 5; + assertEquals(5, config.getOptions(repairType).number_of_repair_threads.intValue()); } @Test @@ -183,7 +182,7 @@ public void testSetRepairMinFrequencyInHours() { config.setRepairMinInterval(repairType, "5s"); - assert config.getOptions(repairType).min_repair_interval.toSeconds() == 5; + assertEquals(5, config.getOptions(repairType).min_repair_interval.toSeconds()); } @Test @@ -201,7 +200,7 @@ public void testSetAutoRepairHistoryClearDeleteHostsBufferInSec() { config.setAutoRepairHistoryClearDeleteHostsBufferInterval("5s"); - assert Objects.equals(config.history_clear_delete_hosts_buffer_interval, new DurationSpec.IntSecondsBound("5s")); + assertEquals(new DurationSpec.IntSecondsBound("5s"), config.history_clear_delete_hosts_buffer_interval); } @Test @@ -219,7 +218,7 @@ public void testSetRepairSSTableCountHigherThreshold() { config.setRepairSSTableCountHigherThreshold(repairType, 5); - assert config.getOptions(repairType).sstable_upper_threshold == 5; + assertEquals(5, config.getOptions(repairType).sstable_upper_threshold.intValue()); } @Test @@ -237,8 +236,8 @@ public void testSetAutoRepairTableMaxRepairTimeInSec() { config.setAutoRepairTableMaxRepairTime(repairType, "5s"); - assert config.getOptions(repairType).table_max_repair_time.toSeconds() == 5; - } + assertEquals(5, config.getOptions(repairType).table_max_repair_time.toSeconds()); +} @Test public void testGetIgnoreDCs() @@ -291,7 +290,7 @@ public void testSetParallelRepairPercentageInGroup() { config.setParallelRepairPercentage(repairType, 5); - assert config.getOptions(repairType).parallel_repair_percentage == 5; + assertEquals(5, config.getOptions(repairType).parallel_repair_percentage.intValue()); } @Test @@ -309,7 +308,55 @@ public void testSetParallelRepairCountInGroup() { config.setParallelRepairCount(repairType, 5); - assert config.getOptions(repairType).parallel_repair_count == 5; + assertEquals(5, config.getOptions(repairType).parallel_repair_count.intValue()); + } + + @Test + public void testGetAllowParallelReplicaRepair() + { + // should default to false + assertFalse(config.global_settings.allow_parallel_replica_repair); + assertFalse(config.getAllowParallelReplicaRepair(repairType)); + + // setting global to true should also cause repair type config to inherit. + config.global_settings.allow_parallel_replica_repair = true; + assertTrue(config.getAllowParallelReplicaRepair(repairType)); + + } + + @Test + public void testSetAllowParallelReplicaRepair() + { + // should default to false + assertFalse(config.getAllowParallelReplicaRepair(repairType)); + + // setting explicitly for repair type should update it + config.setAllowParallelReplicaRepair(repairType, true); + assertTrue(config.getAllowParallelReplicaRepair(repairType)); + } + + @Test + public void testGetAllowParallelReplicaRepairAcrossSchedules() + { + // should default to true + assertTrue(config.global_settings.allow_parallel_replica_repair_across_schedules); + assertTrue(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + // setting global to true should also cause repair type config to inherit. + config.global_settings.allow_parallel_replica_repair_across_schedules = false; + assertFalse(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + } + + @Test + public void testSetAllowParallelReplicaRepairAcrossSchedules() + { + // should default to true + assertTrue(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + // setting explicitly for repair type should update it + config.setAllowParallelReplicaRepairAcrossSchedules(repairType, false); + assertFalse(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); } @Test @@ -396,7 +443,7 @@ public void testSetInitialSchedulerDelay() { config.setInitialSchedulerDelay(repairType, "5s"); - assert config.getOptions(repairType).initial_scheduler_delay.toSeconds() == 5; + assertEquals(5, config.getOptions(repairType).initial_scheduler_delay.toSeconds()); } @Test @@ -414,7 +461,7 @@ public void testSetRepairSessionTimeout() { config.setRepairSessionTimeout(repairType, "1h"); - assert config.getOptions(repairType).repair_session_timeout.toSeconds() == 3600; + assertEquals(3600, config.getOptions(repairType).repair_session_timeout.toSeconds()); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java new file mode 100644 index 000000000000..d0b053a58778 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.StorageService; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class AutoRepairMetricsTest extends CQLTester +{ + + private AutoRepairMetrics metrics; + + @BeforeClass + public static void setupClass() throws Exception + { + setAutoRepairEnabled(true); + requireNetwork(); + AutoRepairUtils.setup(); + StorageService.instance.doAutoRepairSetup(); + + // Set min repair interval to an hour. + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairMinInterval(RepairType.FULL, "1h"); + } + + @Before + public void setup() + { + metrics = AutoRepairMetricsManager.getMetrics(RepairType.FULL); + } + + @Test + public void testShouldRecordRepairStartLagAndResetOnMyTurn() + { + // record a last finish repair time of one day. + long oneDayAgo = AutoRepair.instance.currentTimeMs() - 86_400_000; + metrics.recordRepairStartLag(oneDayAgo); + + // expect a recorded lag time of approximately 1 day (last repair finish time) - 1 hour (min repair interval) + long expectedLag = 86400 - 3600; + long recordedLag = metrics.repairStartLagSec.getValue(); + assertTrue(String.format("Expected at last 23h of lag (%d) but got (%d)", expectedLag, recordedLag), + recordedLag >= expectedLag); + // Given timing, allow at most 5 seconds of skew. + assertTrue(String.format("Expected 23h of lag (%d) but got a larger value (%d)", expectedLag, recordedLag), + recordedLag <= expectedLag + 5); + + // expect lag time to be restarted when recording a turn. + metrics.recordTurn(RepairTurn.MY_TURN); + assertEquals(0, metrics.repairStartLagSec.getValue().intValue()); + } + + @Test + public void testShouldRecordRepairStartLagOfZeroWhenFinishTimeIsWithinMinRepairInterval() + { + // record a last finish repair time of one 30 minutes + long thirtyMinutesAgo = AutoRepair.instance.currentTimeMs() - 1_800_000; + metrics.recordRepairStartLag(thirtyMinutesAgo); + + // expect 0 lag because last repair finish time was less than min repair interval + assertEquals(0, metrics.repairStartLagSec.getValue().intValue()); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 1cc80791b4e6..73efae40130e 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -276,6 +276,8 @@ public void testRepair() //if repair was done then lastRepairTime should be non-zero Assert.assertTrue(String.format("Expected lastRepairTime > 0, actual value lastRepairTime %d", lastRepairTime), lastRepairTime > 0); + // repair start lag sec should be reset on a successful repair + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).repairStartLagSec.getValue().intValue()); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index d79f0cb6b4c8..b800b213bbb6 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -196,7 +196,7 @@ public void testGetCurrentRepairStatus() SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, repairType.toString(), regularRepair)); - CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType); + CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType, AutoRepairUtils.getAutoRepairHistory(repairType), hostId); assertNotNull(status); assertEquals(1, status.historiesWithoutOnGoingRepair.size()); @@ -207,6 +207,7 @@ public void testGetCurrentRepairStatus() assertTrue(status.hostIdsWithOnGoingForceRepair.contains(forceRepair)); assertEquals(1, status.priority.size()); assertTrue(status.priority.contains(regularRepair)); + assertEquals(hostId, status.myRepairHistory.hostId); } @Test From f7c862d8c865bb70d23fda0f30f1004fadc80582 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 30 Mar 2025 13:25:02 -0700 Subject: [PATCH 246/257] Add toString to AutoRepairConfig.java --- .../repair/autorepair/AutoRepairConfig.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 739a57e9c746..b8d6d105fc2a 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -594,4 +594,16 @@ protected T applyOverrides(RepairType repairType, Function optio // Otherwise check defaults return getOverride(Options.getDefaultOptionsMap().get(repairType), optionSupplier); } + + public String toString() + { + return "AutoRepairConfig{" + + "enabled=" + enabled + + ", repair_check_interval=" + repair_check_interval + + ", history_clear_delete_hosts_buffer_interval=" + history_clear_delete_hosts_buffer_interval + + ", repair_task_min_duration=" + repair_task_min_duration + + ", global_settings=" + global_settings + + ", repair_type_overrides=" + repair_type_overrides + + "}"; + } } From d6b7a467296b301cad7b6fd7c2154643f580700e Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 6 Apr 2025 14:15:36 -0700 Subject: [PATCH 247/257] Create AutoRepair executors only if enabled and a few other nits --- conf/cassandra.yaml | 2 +- conf/cassandra_latest.yaml | 2 +- .../pages/managing/operating/auto_repair.adoc | 2 +- .../repair/autorepair/AutoRepair.java | 45 ++++++++----------- .../repair/autorepair/AutoRepairConfig.java | 12 +---- .../cassandra/service/AutoRepairService.java | 2 +- .../cassandra/service/StorageService.java | 2 +- .../AutoRepairParameterizedTest.java | 22 +-------- .../repair/autorepair/AutoRepairTest.java | 40 +++++++---------- .../FixedSplitTokenRangeSplitterHelper.java | 8 ++-- 10 files changed, 48 insertions(+), 89 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 00b5e6e38ad1..7dbf1b3fabe2 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2800,7 +2800,7 @@ storage_compatibility_mode: NONE # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. # initial_scheduler_delay: 5m -# # Timeout for resuming stuck repair sessions. +# # Timeout for retrying stuck repair sessions. # repair_session_timeout: 3h # # Force immediate repair on new nodes after they join the ring. # force_repair_new_node: false diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index befaa7ece68e..6dfc89a975b9 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2493,7 +2493,7 @@ storage_compatibility_mode: NONE # materialized_view_repair_enabled: false # # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. # initial_scheduler_delay: 5m -# # Timeout for resuming stuck repair sessions. +# # Timeout for retrying stuck repair sessions. # repair_session_timeout: 3h # # Force immediate repair on new nodes after they join the ring. # force_repair_new_node: false diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index 06c0db456753..3928d0afccef 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -205,7 +205,7 @@ node. Defaults to true and is only evaluated when allow_parallel_replica_repair | materialized_view_repair_enabled | false | Repairs materialized views if true. | initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. -| repair_session_timeout | 3h | Timeout for resuming stuck repair sessions. +| repair_session_timeout | 3h | Timeout for retrying stuck repair sessions. | force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. | sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. | table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 51704ce2647b..8c08ce7c80cb 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -40,7 +40,6 @@ import org.apache.cassandra.repair.RepairCoordinator; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.config.DurationSpec; -import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.Clock; import org.slf4j.Logger; @@ -84,12 +83,12 @@ public class AutoRepair public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); @VisibleForTesting - public final Map repairStates; + public Map repairStates; @VisibleForTesting - protected final Map repairExecutors; + protected Map repairExecutors; - protected final Map repairRunnableExecutors; + protected Map repairRunnableExecutors; @VisibleForTesting // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair @@ -99,21 +98,13 @@ public class AutoRepair @VisibleForTesting protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; - private boolean isSetupDone = false; + @VisibleForTesting + public boolean isSetupDone = false; public static AutoRepair instance = new AutoRepair(); - @VisibleForTesting - protected AutoRepair() + private AutoRepair() { - repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); - repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); - repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); - for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) - { - repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); - repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); - repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); - } + // Private constructor to prevent instantiation } public void setup() @@ -126,6 +117,16 @@ public void setup() { return; } + repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); + repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); + repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); + } + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); AutoRepairUtils.setup(); @@ -144,16 +145,6 @@ public void setup() } } - // repairAsync runs a repair session of the given type asynchronously. - public void repairAsync(AutoRepairConfig.RepairType repairType) - { - if (!AutoRepairService.instance.getAutoRepairConfig().isAutoRepairEnabled(repairType)) - { - throw new ConfigurationException("Auto-repair is disabled for repair type " + repairType); - } - repairExecutors.get(repairType).submit(() -> repair(repairType)); - } - /** * @return The current observed system time in ms. */ @@ -554,7 +545,7 @@ public boolean isSuccess() public void progress(String tag, ProgressEvent event) { ProgressEventType type = event.getType(); - String message = String.format("[%s] %s", format.format(Clock.Global.currentTimeMillis()), event.getMessage()); + String message = String.format("[%s] %s", format.format(timeFunc.get()), event.getMessage()); if (type == ProgressEventType.ERROR) { logger.error("Repair failure for repair {}: {}", repairType.toString(), message); diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index b8d6d105fc2a..9d842888d585 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -320,16 +320,6 @@ public ParameterizedClass getTokenRangeSplitter(RepairType repairType) return applyOverrides(repairType, opt -> opt.token_range_splitter); } - /** - * Set a new token range splitter, this is not meant to be used other than for testing. - */ - @VisibleForTesting - void setTokenRangeSplitter(RepairType repairType, ParameterizedClass tokenRangeSplitter) - { - getOptions(repairType).token_range_splitter = tokenRangeSplitter; - tokenRangeSplitters.remove(repairType); - } - public IAutoRepairTokenRangeSplitter getTokenRangeSplitterInstance(RepairType repairType) { return tokenRangeSplitters.computeIfAbsent(repairType, @@ -530,7 +520,7 @@ protected static Options getDefaultOptions() public volatile ParameterizedClass token_range_splitter; // After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; - // Timeout for resuming stuck repair sessions. + // Timeout for retrying stuck repair sessions. public volatile DurationSpec.IntSecondsBound repair_session_timeout; // Maximum number of retries for a repair session. public volatile Integer repair_max_retries = 3; diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java index 4d34a53268f1..db1b29c38968 100644 --- a/src/java/org/apache/cassandra/service/AutoRepairService.java +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -71,7 +71,7 @@ public void checkCanRun(String repairType) public void checkCanRun(RepairType repairType) { if (!config.isAutoRepairSchedulingEnabled()) - throw new ConfigurationException("Auto-repair scheduller is disabled."); + throw new ConfigurationException("Auto-repair scheduler is disabled."); if (repairType != RepairType.INCREMENTAL) return; diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 8ce1dd94bd4b..5d22ba4c058a 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -1143,8 +1143,8 @@ public void doAutoRepairSetup() { logger.info("Enabling auto-repair scheduling"); AutoRepair.instance.setup(); + logger.info("AutoRepair setup complete!"); } - logger.info("AutoRepair setup complete!"); } diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 73efae40130e..8c3127ee3097 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -180,8 +180,8 @@ public void setup() Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY).truncateBlocking(); Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY).truncateBlocking(); - - AutoRepair.instance = new AutoRepair(); + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); executeCQL(); timeFuncCalls = 0; @@ -240,24 +240,6 @@ private void executeCQL() .forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); } - @Test(expected = ConfigurationException.class) - public void testRepairAsyncWithRepairTypeDisabled() - { - AutoRepairService.instance.getAutoRepairConfig().setAutoRepairEnabled(repairType, false); - - AutoRepair.instance.repairAsync(repairType); - } - - @Test - public void testRepairAsync() - { - AutoRepair.instance.repairExecutors.put(repairType, mockExecutor); - - AutoRepair.instance.repairAsync(repairType); - - verify(mockExecutor, Mockito.times(1)).submit(Mockito.any(Runnable.class)); - } - @Test public void testRepairTurn() { diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java index caae31c69db1..1eceb386ee25 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -68,14 +68,12 @@ public void setup() @Test public void testSetup() { - AutoRepair instance = new AutoRepair(); - instance.setup(); - - assertEquals(RepairType.values().length, instance.repairExecutors.size()); - for (RepairType repairType : instance.repairExecutors.keySet()) + AutoRepair.instance.setup(); + assertEquals(RepairType.values().length, AutoRepair.instance.repairExecutors.size()); + for (RepairType repairType : AutoRepair.instance.repairExecutors.keySet()) { - int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() - + instance.repairExecutors.get(repairType).getActiveTaskCount(); + int expectedTasks = AutoRepair.instance.repairExecutors.get(repairType).getPendingTaskCount() + + AutoRepair.instance.repairExecutors.get(repairType).getActiveTaskCount(); assertTrue(String.format("Expected > 0 task in queue for %s but was %s", repairType, expectedTasks), expectedTasks > 0); } @@ -84,18 +82,16 @@ public void testSetup() @Test public void testSafeGuardSetupCall() { - AutoRepair instance = new AutoRepair(); - // only one should be setup, and rest should be ignored - instance.setup(); - instance.setup(); - instance.setup(); + AutoRepair.instance.setup(); + AutoRepair.instance.setup(); + AutoRepair.instance.setup(); - assertEquals(RepairType.values().length, instance.repairExecutors.size()); - for (RepairType repairType : instance.repairExecutors.keySet()) + assertEquals(RepairType.values().length, AutoRepair.instance.repairExecutors.size()); + for (RepairType repairType : AutoRepair.instance.repairExecutors.keySet()) { - int expectedTasks = instance.repairExecutors.get(repairType).getPendingTaskCount() - + instance.repairExecutors.get(repairType).getActiveTaskCount(); + int expectedTasks = AutoRepair.instance.repairExecutors.get(repairType).getPendingTaskCount() + + AutoRepair.instance.repairExecutors.get(repairType).getActiveTaskCount(); assertTrue(String.format("Expected > 0 task in queue for %s but was %s", repairType, expectedTasks), expectedTasks > 0); } @@ -108,9 +104,8 @@ public void testSetupFailsWhenIREnabledWithCDCReplay() DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); DatabaseDescriptor.setCDCEnabled(true); DatabaseDescriptor.setCDCOnRepairEnabled(true); - - AutoRepair instance = new AutoRepair(); - instance.setup(); + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); } @Test @@ -120,8 +115,7 @@ public void testNoFailureIfMVRepairOnButConfigIsOff() DatabaseDescriptor.getAutoRepairConfig().setMaterializedViewRepairEnabled(RepairType.INCREMENTAL, false); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - AutoRepair instance = new AutoRepair(); - instance.setup(); + AutoRepair.instance.setup(); } @Test(expected = ConfigurationException.class) @@ -131,8 +125,8 @@ public void testSetupFailsWhenIREnabledWithMVReplay() DatabaseDescriptor.getAutoRepairConfig().setMaterializedViewRepairEnabled(RepairType.INCREMENTAL, true); DatabaseDescriptor.setCDCOnRepairEnabled(false); DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); - AutoRepair instance = new AutoRepair(); - instance.setup(); + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); } @Test diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java index cfa0748f1f7f..dac4a167d556 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -37,6 +37,8 @@ import org.apache.cassandra.service.AutoRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.compatibility.TokenRingUtils; +import org.apache.cassandra.utils.FBUtilities; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; @@ -78,7 +80,7 @@ public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRan { int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); - Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); assertEquals(numTokens, tokens.size()); List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); List> expectedToken = new ArrayList<>(); @@ -123,7 +125,7 @@ public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSub { int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); - Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); assertEquals(numTokens, tokens.size()); List> expectedToken = new ArrayList<>(); for (Range range : tokens) @@ -153,7 +155,7 @@ public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSub public static void testTokenRangesWithDefaultSplit(int numTokens, AutoRepairConfig.RepairType repairType) { int numberOfSplits = calcSplits(numTokens, DEFAULT_NUMBER_OF_SUBRANGES); - Collection> tokens = StorageService.instance.getPrimaryRanges(KEYSPACE); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); assertEquals(numTokens, tokens.size()); List> expectedToken = new ArrayList<>(); for (Range range : tokens) From 729b0d34e7838402ee0d46ebb93465dfaaf0e5d0 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Sun, 6 Apr 2025 20:12:54 -0700 Subject: [PATCH 248/257] parens in next line --- .../autorepair/AutoRepairStateTest.java | 8 ++- .../service/ActiveRepairServiceTest.java | 63 ++++++++++--------- .../nodetool/SSTableRepairedSetTest.java | 29 ++++++--- 3 files changed, 59 insertions(+), 41 deletions(-) diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java index fc4ac2f03c79..422ebdff5192 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -185,7 +185,9 @@ public void testGetLongestUnrepairedSecNull() try { assertEquals(0, state.getLongestUnrepairedSec()); - } catch (Exception e) { + } + catch (Exception e) + { assertNull(e); } } @@ -201,7 +203,9 @@ public void testGetLongestUnrepairedSec() try { assertEquals(1, state.getLongestUnrepairedSec()); - } catch (Exception e) { + } + catch (Exception e) + { assertNull(e); } } diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index 6ccfbcbfa971..455f5194444a 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -1,21 +1,21 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.cassandra.service; import java.net.UnknownHostException; @@ -40,6 +40,7 @@ import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.concurrent.Condition; + import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -229,12 +230,12 @@ public void testGetNeighborsTimesTwoInSpecifiedHosts() throws Throwable } expected.remove(FBUtilities.getBroadcastAddressAndPort()); - Collection hosts = Arrays.asList(FBUtilities.getBroadcastAddressAndPort().getHostAddressAndPort(),expected.get(0).getHostAddressAndPort()); + Collection hosts = Arrays.asList(FBUtilities.getBroadcastAddressAndPort().getHostAddressAndPort(), expected.get(0).getHostAddressAndPort()); Iterable> ranges = StorageService.instance.getLocalReplicas(KEYSPACE5).ranges(); assertEquals(expected.get(0), ActiveRepairService.instance().getNeighbors(KEYSPACE5, ranges, - ranges.iterator().next(), - null, hosts).endpoints().iterator().next()); + ranges.iterator().next(), + null, hosts).endpoints().iterator().next()); } @Test(expected = IllegalArgumentException.class) @@ -264,7 +265,6 @@ public void testParentRepairStatus() throws Throwable List failed = StorageService.instance.getParentRepairStatus(3); assertNotNull(failed); assertEquals(ActiveRepairService.ParentRepairStatus.FAILED, ActiveRepairService.ParentRepairStatus.valueOf(failed.get(0))); - } Set addTokens(int max) throws Throwable @@ -339,10 +339,10 @@ private static RepairOption opts(String... params) { assert params.length % 2 == 0 : "unbalanced key value pairs"; Map opt = new HashMap<>(); - for (int i=0; i<(params.length >> 1); i++) + for (int i = 0; i < (params.length >> 1); i++) { int idx = i << 1; - opt.put(params[idx], params[idx+1]); + opt.put(params[idx], params[idx + 1]); } return RepairOption.parse(opt, DatabaseDescriptor.getPartitioner()); } @@ -362,19 +362,19 @@ public void repairedAt() throws Exception Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true)), false)); // subrange incremental repair Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - RANGES_KEY, "1:2"), false)); + RANGES_KEY, "1:2"), false)); // hosts incremental repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - HOSTS_KEY, "127.0.0.1"), false)); + HOSTS_KEY, "127.0.0.1"), false)); // dc incremental repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - DATACENTERS_KEY, "DC2"), false)); + DATACENTERS_KEY, "DC2"), false)); // forced incremental repair Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - FORCE_REPAIR_KEY, b2s(true)), false)); + FORCE_REPAIR_KEY, b2s(true)), false)); Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - FORCE_REPAIR_KEY, b2s(true)), true)); + FORCE_REPAIR_KEY, b2s(true)), true)); // full repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(false)), false)); @@ -420,7 +420,8 @@ public void testRejectWhenPoolFullStrategy() throws InterruptedException // Submission is unblocked Thread.sleep(250); - validationExecutor.submit(() -> {}); + validationExecutor.submit(() -> { + }); } finally { @@ -457,8 +458,8 @@ public void testQueueWhenPoolFullStrategy() throws InterruptedException allSubmitted.await(TASK_SECONDS + 1, TimeUnit.SECONDS); // Give the tasks we expect to execute immediately chance to be scheduled - Util.spinAssertEquals(2 , ((ExecutorPlus) validationExecutor)::getActiveTaskCount, 1); - Util.spinAssertEquals(3 , ((ExecutorPlus) validationExecutor)::getPendingTaskCount, 1); + Util.spinAssertEquals(2, ((ExecutorPlus) validationExecutor)::getActiveTaskCount, 1); + Util.spinAssertEquals(3, ((ExecutorPlus) validationExecutor)::getPendingTaskCount, 1); // verify that we've reached a steady state with 2 threads actively processing and 3 queued tasks Assert.assertEquals(2, ((ExecutorPlus) validationExecutor).getActiveTaskCount()); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java index edd57447ad13..5d23d22253ad 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java @@ -50,15 +50,23 @@ public class SSTableRepairedSetTest private SSTableRepairedSet cmd; @Before - public void setUp() { + public void setUp() + { MockitoAnnotations.initMocks(this); - PrintStream noopStream = new PrintStream(new OutputStream() {@Override public void write(int b) {}}); + PrintStream noopStream = new PrintStream(new OutputStream() + { + @Override + public void write(int b) + { + } + }); when(probe.output()).thenReturn(new Output(noopStream, noopStream)); cmd = new SSTableRepairedSet(); } @Test - public void testNoKeyspace() { + public void testNoKeyspace() + { when(probe.getNonLocalStrategyKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); when(probe.getAutoRepairTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(Arrays.asList("table1", "table2"))); @@ -73,7 +81,8 @@ public void testNoKeyspace() { } @Test - public void testBothRepairedAndUnrepaired() { + public void testBothRepairedAndUnrepaired() + { cmd.args = Arrays.asList("keyspace"); cmd.isRepaired = true; cmd.isUnrepaired = true; @@ -82,14 +91,16 @@ public void testBothRepairedAndUnrepaired() { } @Test - public void testNeitherRepairedNorUnrepaired() { + public void testNeitherRepairedNorUnrepaired() + { cmd.args = Arrays.asList("keyspace"); cmd.execute(probe); verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); } @Test - public void testRepairedPreview() { + public void testRepairedPreview() + { cmd.args = Arrays.asList("keyspace"); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isRepaired = true; @@ -98,7 +109,8 @@ public void testRepairedPreview() { } @Test - public void testUnrepairedReallySet() { + public void testUnrepairedReallySet() + { cmd.args = Arrays.asList("keyspace"); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isUnrepaired = true; @@ -108,7 +120,8 @@ public void testUnrepairedReallySet() { } @Test - public void testExecuteWithTableNames() { + public void testExecuteWithTableNames() + { cmd.args = Arrays.asList("keyspace", "table1", "table2"); when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); cmd.isRepaired = true; From beb8b61951a8f07644da3aeab78be5bf3f858143 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 18 Apr 2025 10:45:03 -0700 Subject: [PATCH 249/257] Adjust based on the latest trunk --- .../apache/cassandra/config/DurationSpec.java | 1 - .../apache/cassandra/repair/RepairJob.java | 6 ----- .../cassandra/repair/ValidationManager.java | 2 +- .../repair/autorepair/AutoRepairState.java | 6 ++--- .../autorepair/RepairTokenRangeSplitter.java | 3 ++- .../cassandra/schema/SchemaKeyspace.java | 3 +-- .../apache/cassandra/schema/TableParams.java | 2 -- .../service/ActiveRepairService.java | 27 ++++++++----------- .../cassandra/service/StorageService.java | 1 - .../org/apache/cassandra/tools/NodeTool.java | 2 +- .../apache/cassandra/utils/FBUtilities.java | 1 - test/unit/org/apache/cassandra/Util.java | 9 +------ .../statements/DescribeStatementTest.java | 1 + .../AutoRepairParameterizedTest.java | 3 +-- .../AutoRepairTablePropertyTest.java | 3 ++- .../autorepair/AutoRepairUtilsTest.java | 2 -- .../RepairTokenRangeSplitterTest.java | 2 +- .../service/ActiveRepairServiceTest.java | 7 +++-- .../service/StorageServiceServerTest.java | 1 + 19 files changed, 29 insertions(+), 53 deletions(-) diff --git a/src/java/org/apache/cassandra/config/DurationSpec.java b/src/java/org/apache/cassandra/config/DurationSpec.java index 969ba255ea6d..01a7d70b5f46 100644 --- a/src/java/org/apache/cassandra/config/DurationSpec.java +++ b/src/java/org/apache/cassandra/config/DurationSpec.java @@ -17,7 +17,6 @@ */ package org.apache.cassandra.config; -import java.io.Serializable; import java.time.Duration; import java.util.Arrays; import java.util.Objects; diff --git a/src/java/org/apache/cassandra/repair/RepairJob.java b/src/java/org/apache/cassandra/repair/RepairJob.java index 583e0e26991c..f20fe189f22f 100644 --- a/src/java/org/apache/cassandra/repair/RepairJob.java +++ b/src/java/org/apache/cassandra/repair/RepairJob.java @@ -36,12 +36,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.*; - -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.repair.state.JobState; -import org.apache.cassandra.utils.concurrent.AsyncFuture; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/java/org/apache/cassandra/repair/ValidationManager.java b/src/java/org/apache/cassandra/repair/ValidationManager.java index c028102af1a9..f4229751984c 100644 --- a/src/java/org/apache/cassandra/repair/ValidationManager.java +++ b/src/java/org/apache/cassandra/repair/ValidationManager.java @@ -37,8 +37,8 @@ import org.apache.cassandra.metrics.TableMetrics; import org.apache.cassandra.metrics.TopPartitionTracker; import org.apache.cassandra.repair.state.ValidationState; -import org.apache.cassandra.utils.Clock; import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MerkleTree; import org.apache.cassandra.utils.MerkleTrees; diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java index 7266a28d1cc7..6822f20cf023 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -252,7 +252,7 @@ public PreviewRepairedState() public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, - AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.REPAIRED, false, true, false, false, false, false); + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.REPAIRED, false, true, true, false, false, false); option.getColumnFamilies().addAll(tables); @@ -272,7 +272,7 @@ public RepairCoordinator getRepairRunnable(String keyspace, List tables, { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, true, false, AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, - false, false, PreviewKind.NONE, true, true, false, false, false, false); + false, false, PreviewKind.NONE, true, true, true, false, false, false); option.getColumnFamilies().addAll(filterOutUnsafeTables(keyspace, tables)); @@ -317,7 +317,7 @@ public RepairCoordinator getRepairRunnable(String keyspace, List tables, { RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, - false, false, PreviewKind.NONE, true, true, false, false, false, false); + false, false, PreviewKind.NONE, true, true, true, false, false, false); option.getColumnFamilies().addAll(tables); diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index a743df946e36..6f522b4197d4 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -37,6 +37,7 @@ import java.util.stream.Collectors; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; import org.apache.cassandra.tcm.compatibility.TokenRingUtils; import org.apache.cassandra.utils.FBUtilities; @@ -671,7 +672,7 @@ static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repa while (refs == null) { Iterable sstables = cfs.getTracker().getView().select(SSTableSet.CANONICAL); - SSTableIntervalTree tree = SSTableIntervalTree.build(sstables); + SSTableIntervalTree tree = SSTableIntervalTree.buildSSTableIntervalTree(ImmutableList.copyOf(sstables)); Range r = Range.makeRowRange(tokenRange); List canonicalSSTables = View.sstablesInBounds(r.left, r.right, tree); if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index fe98cfa0ec1a..9001480c2494 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -45,7 +45,6 @@ import org.apache.cassandra.db.rows.*; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.schema.ColumnMetadata.ClusteringOrder; import org.apache.cassandra.schema.Keyspaces.KeyspacesDiff; @@ -577,7 +576,7 @@ private static void addTableToSchemaMutation(TableMetadata table, boolean withCo } } - private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBuilder builder, boolean forView) + public static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBuilder builder, boolean forView) { builder.add("bloom_filter_fp_chance", params.bloomFilterFpChance) .add("comment", params.comment) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 2fe01b78470a..e44af5ca5613 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -37,8 +37,6 @@ import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; -import org.apache.cassandra.tcm.serialization.MetadataSerializer; -import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.service.reads.PercentileSpeculativeRetryPolicy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index 22d78794bcee..bee41a7fa3a3 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -50,28 +50,13 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.config.Config; -import org.apache.cassandra.config.DurationSpec; -import org.apache.cassandra.repair.Scheduler; -import org.apache.cassandra.locator.EndpointsByRange; -import org.apache.cassandra.locator.EndpointsForRange; -import org.apache.cassandra.utils.ExecutorUtils; -import org.apache.cassandra.repair.state.CoordinatorState; -import org.apache.cassandra.repair.state.ParticipateState; -import org.apache.cassandra.repair.state.ValidationState; -import org.apache.cassandra.utils.Simulate; -import org.apache.cassandra.streaming.PreviewKind; -import org.apache.cassandra.utils.TimeUUID; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.IPartitioner; @@ -86,6 +71,8 @@ import org.apache.cassandra.gms.IEndpointStateChangeSubscriber; import org.apache.cassandra.gms.IFailureDetectionEventListener; import org.apache.cassandra.gms.VersionedValue; +import org.apache.cassandra.locator.EndpointsByRange; +import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.RepairMetrics; import org.apache.cassandra.net.Message; @@ -96,6 +83,7 @@ import org.apache.cassandra.repair.RepairJobDesc; import org.apache.cassandra.repair.RepairParallelism; import org.apache.cassandra.repair.RepairSession; +import org.apache.cassandra.repair.Scheduler; import org.apache.cassandra.repair.SharedContext; import org.apache.cassandra.repair.consistent.CoordinatorSessions; import org.apache.cassandra.repair.consistent.LocalSessions; @@ -110,6 +98,9 @@ import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.repair.messages.SyncResponse; import org.apache.cassandra.repair.messages.ValidationResponse; +import org.apache.cassandra.repair.state.CoordinatorState; +import org.apache.cassandra.repair.state.ParticipateState; +import org.apache.cassandra.repair.state.ValidationState; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; @@ -118,8 +109,12 @@ import org.apache.cassandra.service.paxos.cleanup.PaxosCleanup; import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.MerkleTrees; import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.Simulate; +import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.FutureCombiner; diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 5d22ba4c058a..8ef0c7fc8b2b 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -1147,7 +1147,6 @@ public void doAutoRepairSetup() } } - public boolean isAuthSetupComplete() { return authSetupComplete; diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 3056b03f5fbc..3e025df1652e 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -93,8 +93,8 @@ public int execute(String... args) List> commands = newArrayList( AbortBootstrap.class, AlterTopology.class, - AutoRepairStatus.class, Assassinate.class, + AutoRepairStatus.class, CassHelp.class, CIDRFilteringStats.class, Cleanup.class, diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index fd7d24c1e502..a4493f30a201 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -65,7 +65,6 @@ import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; -import com.vdurmont.semver4j.Semver; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 2bd1ae43116f..eb41f5b864d6 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -57,14 +57,6 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; -import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; -import org.apache.cassandra.gms.ApplicationState; -import org.apache.cassandra.gms.Gossiper; -import org.apache.cassandra.gms.VersionedValue; -import org.apache.cassandra.io.util.File; - -import org.apache.cassandra.config.Config; -import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.commons.lang3.StringUtils; import org.junit.Assume; import org.slf4j.Logger; @@ -140,6 +132,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaCollection; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index a7cbf41a99fd..5d376dcf0721 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -1172,6 +1172,7 @@ private static String tableParametersCql() " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + " AND memtable = 'default'\n" + " AND crc_check_chance = 1.0\n" + + " AND fast_path = 'keyspace'\n" + " AND default_time_to_live = 0\n" + " AND extensions = {}\n" + " AND gc_grace_seconds = 864000\n" + diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java index 8c3127ee3097..8e6a559d3343 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -81,7 +81,6 @@ import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.spy; @@ -814,7 +813,7 @@ public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() { RepairOption options = new RepairOption(RepairParallelism.PARALLEL, true, repairType == AutoRepairConfig.RepairType.INCREMENTAL, false, AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), Collections.emptySet(), - false, false, PreviewKind.NONE, false, true, false, false, false, false); + false, false, PreviewKind.NONE, false, true, true, false, false, false); AutoRepairState repairState = AutoRepair.instance.repairStates.get(repairType); AutoRepairState spyState = spy(repairState); AtomicReference failingListener = new AtomicReference<>(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java index 51551e46369d..1bf8e52f9849 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java @@ -59,13 +59,14 @@ public void testSchedulerEnabledShouldReturnColumnReturned() public void helperTestTableProperty(boolean autoRepairOn) { DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(autoRepairOn); + DatabaseDescriptor.setAccordTransactionsEnabled(false); Map systemSchemaTables = Map.of(SchemaKeyspaceTables.TABLES, "table_name", SchemaKeyspaceTables.VIEWS, "view_name"); for (Map.Entry systemSchema : systemSchemaTables.entrySet()) { ColumnFamilyStore tables = Keyspace.open(SchemaConstants.SCHEMA_KEYSPACE_NAME).getColumnFamilyStore(systemSchema.getKey()); SimpleBuilders.RowBuilder builder = new SimpleBuilders.RowBuilder(tables.metadata(), systemSchema.getValue()); - SchemaKeyspace.addTableParamsToRowBuilder(tables.metadata().params, builder); + SchemaKeyspace.addTableParamsToRowBuilder(tables.metadata().params, builder, false); Row row = builder.build(); ColumnMetadata autoRepair = tables.metadata().getColumn(ByteBufferUtil.bytes("auto_repair")); ColumnData data = row.getCell(autoRepair); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java index b800b213bbb6..d9723ea193dd 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -35,7 +35,6 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.Locator; import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; import org.apache.cassandra.repair.autorepair.AutoRepairUtils.CurrentRepairStatus; @@ -44,7 +43,6 @@ import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.utils.FBUtilities; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java index f727a3a2c44b..79fef533f18f 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -83,7 +83,7 @@ public static void setUpClass() { CQLTester.setUpClass(); AutoRepairService.setup(); - FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumToken()); + FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumTokenForSplitting()); } @Before diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index 455f5194444a..5293149b1b59 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -37,10 +37,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; -import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; -import org.apache.cassandra.utils.TimeUUID; -import org.apache.cassandra.utils.concurrent.Condition; - import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -66,6 +62,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; import org.apache.cassandra.service.snapshot.TableSnapshot; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.tcm.ClusterMetadata; @@ -74,6 +71,8 @@ import org.apache.cassandra.tcm.transformations.Register; import org.apache.cassandra.tcm.transformations.UnsafeJoin; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.TimeUUID; +import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.Refs; import org.mockito.Mock; diff --git a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java index 9a6a0bda404d..7814b82ba08a 100644 --- a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java +++ b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java @@ -603,6 +603,7 @@ public void testAuditLogEnableLoggerTransitions() throws Exception assertTrue(AuditLogManager.instance.isEnabled()); StorageService.instance.disableAuditLog(); } + /** Create a new AuditLogOptions instance with the log dir set appropriately to a temp dir for unit testing. */ From 9e9c035a806f10c5fe8c1e997cb0367349939467 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 21 Apr 2025 15:03:26 -0700 Subject: [PATCH 250/257] Update doc/modules/cassandra/pages/managing/operating/metrics.adoc Co-authored-by: Francisco Guerrero --- doc/modules/cassandra/pages/managing/operating/metrics.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 7294a3675e82..fe82c8421edd 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1143,7 +1143,7 @@ delayed because a replica was currently taking its turn. Only revelent if `allow_parallel_replica_repair` is false. |RepairDelayedBySchedule |Counter |Represents occurrences of a node's turn being -delayed because it was already being repaired in another schedule. Only relevent +delayed because it was already being repaired in another schedule. Only relevant if `allow_parallel_replica_repair_across_schedules` is false. |=== From eff68dbd4b6e017ef7570066632518c3a3f8f338 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 21 Apr 2025 15:03:45 -0700 Subject: [PATCH 251/257] Update src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java Co-authored-by: Francisco Guerrero --- .../cassandra/repair/autorepair/RepairTokenRangeSplitter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 6f522b4197d4..8f4782312c92 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -93,7 +93,7 @@ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter private static final Logger logger = LoggerFactory.getLogger(RepairTokenRangeSplitter.class); // Default max bytes to 100TiB, which is much more readable than Long.MAX_VALUE - private static final DataStorageSpec.LongBytesBound MAX_BYTES = new DataStorageSpec.LongBytesBound(100_000, DataStorageSpec.DataStorageUnit.GIBIBYTES); + private static final DataStorageSpec.LongBytesBound MAX_BYTES = new DataStorageSpec.LongBytesBound(102_400, DataStorageSpec.DataStorageUnit.GIBIBYTES); /** * The target bytes that should be included in a repair assignment From 44a16947d0e0061cc0e56570aaa4b087e6c64b4b Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 21 Apr 2025 15:04:30 -0700 Subject: [PATCH 252/257] Update src/java/org/apache/cassandra/schema/AutoRepairParams.java Co-authored-by: Francisco Guerrero --- src/java/org/apache/cassandra/schema/AutoRepairParams.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index a1111a84c51a..de8896911a5b 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -71,11 +71,7 @@ public String toString() public static AutoRepairParams create(Map options) { - Map optionsMap = new TreeMap<>(); - for (Map.Entry entry : DEFAULT_OPTIONS.entrySet()) - { - optionsMap.put(entry.getKey(), entry.getValue()); - } + Map optionsMap = new TreeMap<>(DEFAULT_OPTIONS); if (options != null) { for (Map.Entry entry : options.entrySet()) From a957d90991d40db315701cf9a13abae5220855d0 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 21 Apr 2025 15:04:52 -0700 Subject: [PATCH 253/257] Update src/java/org/apache/cassandra/schema/AutoRepairParams.java Co-authored-by: Francisco Guerrero --- src/java/org/apache/cassandra/schema/AutoRepairParams.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index de8896911a5b..a730e6597cab 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -97,10 +97,8 @@ public boolean repairEnabled(AutoRepairConfig.RepairType type) public int priority() { - String priority = options.get(Option.PRIORITY.toString()); - return priority == null - ? Integer.parseInt(DEFAULT_OPTIONS.get(Option.PRIORITY.toString())) - : Integer.parseInt(priority); + String priority = options.getOrDefault(Option.PRIORITY.toString(), DEFAULT_OPTIONS.get(Option.PRIORITY.toString())); + return Integer.parseInt(priority); } public void validate() From 0abf12a2ef6429bf4804701f5ffed1bb2e8849c5 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 21 Apr 2025 15:06:14 -0700 Subject: [PATCH 254/257] Update src/java/org/apache/cassandra/schema/AutoRepairParams.java Co-authored-by: Francisco Guerrero --- src/java/org/apache/cassandra/schema/AutoRepairParams.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java index a730e6597cab..1fe80f766f3e 100644 --- a/src/java/org/apache/cassandra/schema/AutoRepairParams.java +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -89,10 +89,8 @@ public static AutoRepairParams create(Map options) public boolean repairEnabled(AutoRepairConfig.RepairType type) { String option = LocalizeString.toLowerCaseLocalized(type.toString()) + "_enabled"; - String enabled = options.get(option); - return enabled == null - ? Boolean.parseBoolean(DEFAULT_OPTIONS.get(option)) - : Boolean.parseBoolean(enabled); + String enabled = options.getOrDefault(option, DEFAULT_OPTIONS.get(option)); + return Boolean.parseBoolean(enabled); } public int priority() From 5f4a938520e9e35fe19fd69af224b5696bfc90f7 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 21 Apr 2025 15:37:42 -0700 Subject: [PATCH 255/257] Validate disk headroom for IR and a couple of comment updates --- .../cassandra/config/DatabaseDescriptor.java | 3 +++ .../autorepair/RepairTokenRangeSplitter.java | 17 +++++++++++++++++ .../schema/SystemDistributedKeyspace.java | 1 + 3 files changed, 21 insertions(+) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index de3e13099b21..59e6c1c7259c 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5924,6 +5924,9 @@ public static double getIncrementalRepairDiskHeadroomRejectRatio() public static void setIncrementalRepairDiskHeadroomRejectRatio(double value) { + if (value < 0.0 || value > 1.0) { + throw new IllegalArgumentException("Value must be >= 0 and <= 1 for incremental_repair_disk_headroom_reject_ratio"); + } conf.incremental_repair_disk_headroom_reject_ratio = value; } diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java index 6f522b4197d4..90917dbcd53b 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -87,6 +87,23 @@ * shall be sensible for all {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType}'s. The following * configuration parameters are offered. *

        + * + *

        Configuration parameters:

        + *
          + *
        • bytes_per_assignment – Target size (in compressed bytes) for each repair. Throttles incremental repair + * and anticompaction per schedule after incremental repairs are enabled.
        • + * + *
        • max_bytes_per_schedule – Maximum data (in compressed bytes) to cover in a single schedule. Acts as a + * throttle for the repair cycle workload. Tune this up if writes are outpacing repair, or down if repairs are too + * disruptive. Alternatively, adjust {@code min_repair_interval}.
        • + * + *
        • partitions_per_assignment – Maximum number of partitions per repair assignment. Limits the number of + * partitions in Merkle tree leaves to prevent overstreaming.
        • + * + *
        • max_tables_per_assignment – Maximum number of tables to include in a single repair assignment. + * Especially useful for keyspaces with many tables. Prevents excessive batching of tables that exceed other + * parameters like {@code bytes_per_assignment} or {@code partitions_per_assignment}.
        • + *
        */ public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter { diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index a1d03bbf8e03..d50621a3a15c 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -82,6 +82,7 @@ private SystemDistributedKeyspace() * gen 4: compression chunk length reduced to 16KiB, memtable_flush_period_in_ms now unset on all tables in 4.0 * gen 5: add ttl and TWCS to repair_history tables * gen 6: add denylist table + * gen 7: add auto_repair_history and auto_repair_priority tables for AutoRepair feature * * // TODO: TCM - how do we evolve these tables? */ From ab6c770178bbf64094f3df26ba6138dda894c481 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 21 Apr 2025 15:43:35 -0700 Subject: [PATCH 256/257] Format { on the new line --- src/java/org/apache/cassandra/config/DatabaseDescriptor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 59e6c1c7259c..77aa3bcace07 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5924,7 +5924,8 @@ public static double getIncrementalRepairDiskHeadroomRejectRatio() public static void setIncrementalRepairDiskHeadroomRejectRatio(double value) { - if (value < 0.0 || value > 1.0) { + if (value < 0.0 || value > 1.0) + { throw new IllegalArgumentException("Value must be >= 0 and <= 1 for incremental_repair_disk_headroom_reject_ratio"); } conf.incremental_repair_disk_headroom_reject_ratio = value; From 34bf40666321558e44bd591aed96f086cab78ed6 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 21 Apr 2025 18:59:34 -0700 Subject: [PATCH 257/257] Update doc/modules/cassandra/pages/managing/operating/metrics.adoc Co-authored-by: Francisco Guerrero --- doc/modules/cassandra/pages/managing/operating/metrics.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index fe82c8421edd..6fba0f811711 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -1139,7 +1139,7 @@ the automated repair has been disabled on the node due to priority set in the automated repair |RepairDelayedByReplica |Counter |Represents occurrences of a node's turn being -delayed because a replica was currently taking its turn. Only revelent if +delayed because a replica was currently taking its turn. Only relevant if `allow_parallel_replica_repair` is false. |RepairDelayedBySchedule |Counter |Represents occurrences of a node's turn being