From ef226dfa8eca537830f78f4f3dfb045cd2729d44 Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Fri, 19 Dec 2025 12:20:08 +0000 Subject: [PATCH 1/4] Minor perf optimizations around memtable put logic Avoid BTree iterators allocation in ColumnsCollector.update reduce amout of virtual calls for Cell.dataSize avoid capturing lambda allocation in BTreeRow.clone do not recalculate minLocalDeletionTime when BTreeRow is cloned Patch by Dmitry Konstantinov; reviewed by TBD for CASSANDRA-21088 --- src/java/org/apache/cassandra/db/Columns.java | 6 ++ .../db/memtable/AbstractMemtable.java | 7 +- .../cassandra/db/rows/AbstractCell.java | 7 +- .../apache/cassandra/db/rows/BTreeRow.java | 4 +- .../apache/cassandra/db/rows/NativeCell.java | 17 +++++ .../apache/cassandra/utils/btree/BTree.java | 75 +++++++++++++++++++ 6 files changed, 107 insertions(+), 9 deletions(-) diff --git a/src/java/org/apache/cassandra/db/Columns.java b/src/java/org/apache/cassandra/db/Columns.java index 3e014d3254f7..153b865d2390 100644 --- a/src/java/org/apache/cassandra/db/Columns.java +++ b/src/java/org/apache/cassandra/db/Columns.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.*; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Predicate; @@ -426,6 +427,11 @@ public void apply(Consumer function) BTree.apply(columns, function); } + public void apply(BiConsumer function, V argument) + { + BTree.apply(columns, function, argument); + } + @Override public boolean equals(Object other) { diff --git a/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java b/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java index 8983979a9112..6bd1b08db896 100644 --- a/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java +++ b/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java @@ -196,10 +196,9 @@ protected static class ColumnsCollector public void update(RegularAndStaticColumns columns) { - for (ColumnMetadata s : columns.statics) - update(s); - for (ColumnMetadata r : columns.regulars) - update(r); + if (!columns.statics.isEmpty()) + columns.statics.apply(ColumnsCollector::update, this); + columns.regulars.apply(ColumnsCollector::update, this); } public void update(ColumnsCollector other) diff --git a/src/java/org/apache/cassandra/db/rows/AbstractCell.java b/src/java/org/apache/cassandra/db/rows/AbstractCell.java index aa5536593dcc..d8adb0683e17 100644 --- a/src/java/org/apache/cassandra/db/rows/AbstractCell.java +++ b/src/java/org/apache/cassandra/db/rows/AbstractCell.java @@ -149,9 +149,10 @@ public Cell updateAllTimesWithNewCellPathForComplexColumnData(@Nonnull CellPa public int dataSize() { CellPath path = path(); - return TypeSizes.sizeof(timestamp()) - + TypeSizes.sizeof(ttl()) - + TypeSizes.sizeof(localDeletionTime()) + // NOTE: TypeSizes.sizeof(localDeletionTime()) - method call like this is not eliminated by JIT in case of a megamorphic call + return TypeSizes.LONG_SIZE // timestamp() + + TypeSizes.INT_SIZE // ttl() + + TypeSizes.LONG_SIZE // localDeletionTime() + valueSize() + (path == null ? 0 : path.dataSize()); } diff --git a/src/java/org/apache/cassandra/db/rows/BTreeRow.java b/src/java/org/apache/cassandra/db/rows/BTreeRow.java index bf6b5b5061c1..7020f96b479b 100644 --- a/src/java/org/apache/cassandra/db/rows/BTreeRow.java +++ b/src/java/org/apache/cassandra/db/rows/BTreeRow.java @@ -525,8 +525,8 @@ public Row transform(Function function) @Override public Row clone(Cloner cloner) { - Object[] tree = BTree.transform(btree, c -> c.clone(cloner)); - return BTreeRow.create(cloner.clone(clustering), primaryKeyLivenessInfo, deletion, tree); + Object[] tree = BTree.transform(btree, ColumnData::clone, cloner); + return BTreeRow.create(cloner.clone(clustering), primaryKeyLivenessInfo, deletion, tree, minLocalDeletionTime); } public int dataSize() diff --git a/src/java/org/apache/cassandra/db/rows/NativeCell.java b/src/java/org/apache/cassandra/db/rows/NativeCell.java index 56301cfa7a49..2c77583ec61e 100644 --- a/src/java/org/apache/cassandra/db/rows/NativeCell.java +++ b/src/java/org/apache/cassandra/db/rows/NativeCell.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.AddressBasedNativeData; import org.apache.cassandra.db.marshal.ByteArrayAccessor; import org.apache.cassandra.db.marshal.NativeAccessor; @@ -198,6 +199,16 @@ public int valueSize() return NativeEndianMemoryUtil.getInt(peer + LENGTH); } + public int dataSize() + { + // NOTE: TypeSizes.sizeof(localDeletionTime()) - method calls like these are not eliminated by JIT in case of a megamorphic call + return TypeSizes.LONG_SIZE // timestamp() + + TypeSizes.INT_SIZE // ttl() + + TypeSizes.LONG_SIZE // localDeletionTime() + + valueSize() + + (hasPath() ? pathDataSize() : 0); + } + public CellPath path() { if (!hasPath()) @@ -208,6 +219,12 @@ public CellPath path() return CellPath.create(MemoryUtil.getByteBuffer(offset + 4, size, ByteOrder.BIG_ENDIAN)); } + private int pathDataSize() + { + long offset = getAddress() + valueSize(); + return NativeEndianMemoryUtil.getInt(offset); + } + public Cell withUpdatedValue(ByteBuffer newValue) { throw new UnsupportedOperationException(); diff --git a/src/java/org/apache/cassandra/utils/btree/BTree.java b/src/java/org/apache/cassandra/utils/btree/BTree.java index 0f84b6a5a906..5306a7bdb7f6 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTree.java +++ b/src/java/org/apache/cassandra/utils/btree/BTree.java @@ -1273,6 +1273,7 @@ private static Object[] transformAndFilterLeaf(Object[] leaf, BiFunct * The result of any transformation must sort identically as their originals, wrt other results. *

* If no modifications are made, the original is returned. + * NOTE: codewise *identical* to {@link #transform(Object[], BiFunction, Object)} */ public static Object[] transform(Object[] tree, Function function) { @@ -1316,6 +1317,55 @@ public static Object[] transform(Object[] tree, Function + * If no modifications are made, the original is returned. + * NOTE: codewise *identical* to {@link #transform(Object[], Function)} + */ + public static Object[] transform(Object[] tree, BiFunction function, I2 param) + { + if (isEmpty(tree)) // isEmpty determined by identity; must return input + return tree; + + if (isLeaf(tree)) // escape hatch for fast leaf transformation + return transformLeaf(tree, function, param); + + Object[] result = tree; // optimistically assume we'll return our input unmodified + int keyCount = shallowSizeOfBranch(tree); + for (int i = 0; i < keyCount; ++i) + { + // operate on a pair of (child,key) each loop + Object[] curChild = (Object[]) tree[keyCount + i]; + Object[] updChild = transform(curChild, function, param); + Object curKey = tree[i]; + Object updKey = function.apply((I) curKey, param); + if (result == tree) + { + if (curChild == updChild && curKey == updKey) + continue; // if output still same as input, loop + + // otherwise initialise output to a copy of input up to this point + result = transformCopyBranchHelper(tree, keyCount, i, i); + } + result[keyCount + i] = updChild; + result[i] = updKey; + } + // final unrolled copy of loop for last child only (unbalanced with keys) + Object[] curChild = (Object[]) tree[2 * keyCount]; + Object[] updChild = transform(curChild, function, param); + if (result == tree) + { + if (curChild == updChild) + return tree; + result = transformCopyBranchHelper(tree, keyCount, keyCount, keyCount); + } + result[2 * keyCount] = updChild; + result[2 * keyCount + 1] = tree[2 * keyCount + 1]; // take the original sizeMap, as we are exactly the same shape + return result; + } + // create a copy of a branch, with the exact same size, copying the specified number of keys and children private static Object[] transformCopyBranchHelper(Object[] branch, int keyCount, int copyKeyCount, int copyChildCount) { @@ -1326,6 +1376,7 @@ private static Object[] transformCopyBranchHelper(Object[] branch, int keyCount, } // an efficient transformAndFilter implementation suitable for a tree consisting of a single leaf root + // NOTE: codewise *identical* to {@link #transformLeaf(Object[], BiFunction, Object)} private static Object[] transformLeaf(Object[] leaf, Function apply) { Object[] result = leaf; // optimistically assume we'll return our input unmodified @@ -1348,6 +1399,30 @@ private static Object[] transformLeaf(Object[] leaf, Function Object[] transformLeaf(Object[] leaf, BiFunction apply, I2 param) + { + Object[] result = leaf; // optimistically assume we'll return our input unmodified + int size = sizeOfLeaf(leaf); + for (int i = 0; i < size; ++i) + { + Object current = leaf[i]; + Object updated = apply.apply((I) current, param); + if (result == leaf) + { + if (current == updated) + continue; // if output still same as input, loop + + // otherwise initialise output to a copy of input up to this point + result = new Object[leaf.length]; + System.arraycopy(leaf, 0, result, 0, i); + } + result[i] = updated; + } + return result; + } + public static boolean equals(Object[] a, Object[] b) { return size(a) == size(b) && Iterators.elementsEqual(iterator(a), iterator(b)); From f0d0d3c70d6c8712d06a343603beb0a7c1463fc2 Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Fri, 19 Dec 2025 12:57:02 +0000 Subject: [PATCH 2/4] avoid CAS write if EncodingStats is not changed after merge --- src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java b/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java index 6bd1b08db896..9c6aa931812d 100644 --- a/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java +++ b/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java @@ -252,6 +252,8 @@ public void update(EncodingStats newStats) { EncodingStats current = stats.get(); EncodingStats updated = current.mergeWith(newStats); + if (current == updated) + return; if (stats.compareAndSet(current, updated)) return; } From 2c4274372685eea6a547438a4932dd28ab063d35 Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Sat, 20 Dec 2025 15:41:58 +0000 Subject: [PATCH 3/4] just use a method reference, an apply with an extra function parameter is not needed here --- src/java/org/apache/cassandra/db/Columns.java | 6 -- .../db/memtable/AbstractMemtable.java | 4 +- .../apache/cassandra/utils/btree/BTree.java | 75 ------------------- 3 files changed, 2 insertions(+), 83 deletions(-) diff --git a/src/java/org/apache/cassandra/db/Columns.java b/src/java/org/apache/cassandra/db/Columns.java index 153b865d2390..3e014d3254f7 100644 --- a/src/java/org/apache/cassandra/db/Columns.java +++ b/src/java/org/apache/cassandra/db/Columns.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.*; -import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Predicate; @@ -427,11 +426,6 @@ public void apply(Consumer function) BTree.apply(columns, function); } - public void apply(BiConsumer function, V argument) - { - BTree.apply(columns, function, argument); - } - @Override public boolean equals(Object other) { diff --git a/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java b/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java index 9c6aa931812d..b5e560961e6b 100644 --- a/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java +++ b/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java @@ -197,8 +197,8 @@ protected static class ColumnsCollector public void update(RegularAndStaticColumns columns) { if (!columns.statics.isEmpty()) - columns.statics.apply(ColumnsCollector::update, this); - columns.regulars.apply(ColumnsCollector::update, this); + columns.statics.apply(this::update); + columns.regulars.apply(this::update); } public void update(ColumnsCollector other) diff --git a/src/java/org/apache/cassandra/utils/btree/BTree.java b/src/java/org/apache/cassandra/utils/btree/BTree.java index 5306a7bdb7f6..0f84b6a5a906 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTree.java +++ b/src/java/org/apache/cassandra/utils/btree/BTree.java @@ -1273,7 +1273,6 @@ private static Object[] transformAndFilterLeaf(Object[] leaf, BiFunct * The result of any transformation must sort identically as their originals, wrt other results. *

* If no modifications are made, the original is returned. - * NOTE: codewise *identical* to {@link #transform(Object[], BiFunction, Object)} */ public static Object[] transform(Object[] tree, Function function) { @@ -1317,55 +1316,6 @@ public static Object[] transform(Object[] tree, Function - * If no modifications are made, the original is returned. - * NOTE: codewise *identical* to {@link #transform(Object[], Function)} - */ - public static Object[] transform(Object[] tree, BiFunction function, I2 param) - { - if (isEmpty(tree)) // isEmpty determined by identity; must return input - return tree; - - if (isLeaf(tree)) // escape hatch for fast leaf transformation - return transformLeaf(tree, function, param); - - Object[] result = tree; // optimistically assume we'll return our input unmodified - int keyCount = shallowSizeOfBranch(tree); - for (int i = 0; i < keyCount; ++i) - { - // operate on a pair of (child,key) each loop - Object[] curChild = (Object[]) tree[keyCount + i]; - Object[] updChild = transform(curChild, function, param); - Object curKey = tree[i]; - Object updKey = function.apply((I) curKey, param); - if (result == tree) - { - if (curChild == updChild && curKey == updKey) - continue; // if output still same as input, loop - - // otherwise initialise output to a copy of input up to this point - result = transformCopyBranchHelper(tree, keyCount, i, i); - } - result[keyCount + i] = updChild; - result[i] = updKey; - } - // final unrolled copy of loop for last child only (unbalanced with keys) - Object[] curChild = (Object[]) tree[2 * keyCount]; - Object[] updChild = transform(curChild, function, param); - if (result == tree) - { - if (curChild == updChild) - return tree; - result = transformCopyBranchHelper(tree, keyCount, keyCount, keyCount); - } - result[2 * keyCount] = updChild; - result[2 * keyCount + 1] = tree[2 * keyCount + 1]; // take the original sizeMap, as we are exactly the same shape - return result; - } - // create a copy of a branch, with the exact same size, copying the specified number of keys and children private static Object[] transformCopyBranchHelper(Object[] branch, int keyCount, int copyKeyCount, int copyChildCount) { @@ -1376,7 +1326,6 @@ private static Object[] transformCopyBranchHelper(Object[] branch, int keyCount, } // an efficient transformAndFilter implementation suitable for a tree consisting of a single leaf root - // NOTE: codewise *identical* to {@link #transformLeaf(Object[], BiFunction, Object)} private static Object[] transformLeaf(Object[] leaf, Function apply) { Object[] result = leaf; // optimistically assume we'll return our input unmodified @@ -1399,30 +1348,6 @@ private static Object[] transformLeaf(Object[] leaf, Function Object[] transformLeaf(Object[] leaf, BiFunction apply, I2 param) - { - Object[] result = leaf; // optimistically assume we'll return our input unmodified - int size = sizeOfLeaf(leaf); - for (int i = 0; i < size; ++i) - { - Object current = leaf[i]; - Object updated = apply.apply((I) current, param); - if (result == leaf) - { - if (current == updated) - continue; // if output still same as input, loop - - // otherwise initialise output to a copy of input up to this point - result = new Object[leaf.length]; - System.arraycopy(leaf, 0, result, 0, i); - } - result[i] = updated; - } - return result; - } - public static boolean equals(Object[] a, Object[] b) { return size(a) == size(b) && Iterators.elementsEqual(iterator(a), iterator(b)); From 9b24d7d2e1fc9439e5341d2c3e4d35834aa177fa Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Sat, 20 Dec 2025 15:51:00 +0000 Subject: [PATCH 4/4] transform with argument is still needed for BTreeRow clone logic --- .../cassandra/db/rows/ComplexColumnData.java | 2 +- .../apache/cassandra/utils/btree/BTree.java | 75 +++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java index 20c4045bac47..8de4cc9943e8 100644 --- a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java +++ b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java @@ -257,7 +257,7 @@ public ComplexColumnData transform(Function, ? extends Cell< @Override public ColumnData clone(Cloner cloner) { - return transform(c -> cloner.clone(c)); + return transform(cloner::clone); } public int estimateCloneSize(Cloner cloner) diff --git a/src/java/org/apache/cassandra/utils/btree/BTree.java b/src/java/org/apache/cassandra/utils/btree/BTree.java index 0f84b6a5a906..5306a7bdb7f6 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTree.java +++ b/src/java/org/apache/cassandra/utils/btree/BTree.java @@ -1273,6 +1273,7 @@ private static Object[] transformAndFilterLeaf(Object[] leaf, BiFunct * The result of any transformation must sort identically as their originals, wrt other results. *

* If no modifications are made, the original is returned. + * NOTE: codewise *identical* to {@link #transform(Object[], BiFunction, Object)} */ public static Object[] transform(Object[] tree, Function function) { @@ -1316,6 +1317,55 @@ public static Object[] transform(Object[] tree, Function + * If no modifications are made, the original is returned. + * NOTE: codewise *identical* to {@link #transform(Object[], Function)} + */ + public static Object[] transform(Object[] tree, BiFunction function, I2 param) + { + if (isEmpty(tree)) // isEmpty determined by identity; must return input + return tree; + + if (isLeaf(tree)) // escape hatch for fast leaf transformation + return transformLeaf(tree, function, param); + + Object[] result = tree; // optimistically assume we'll return our input unmodified + int keyCount = shallowSizeOfBranch(tree); + for (int i = 0; i < keyCount; ++i) + { + // operate on a pair of (child,key) each loop + Object[] curChild = (Object[]) tree[keyCount + i]; + Object[] updChild = transform(curChild, function, param); + Object curKey = tree[i]; + Object updKey = function.apply((I) curKey, param); + if (result == tree) + { + if (curChild == updChild && curKey == updKey) + continue; // if output still same as input, loop + + // otherwise initialise output to a copy of input up to this point + result = transformCopyBranchHelper(tree, keyCount, i, i); + } + result[keyCount + i] = updChild; + result[i] = updKey; + } + // final unrolled copy of loop for last child only (unbalanced with keys) + Object[] curChild = (Object[]) tree[2 * keyCount]; + Object[] updChild = transform(curChild, function, param); + if (result == tree) + { + if (curChild == updChild) + return tree; + result = transformCopyBranchHelper(tree, keyCount, keyCount, keyCount); + } + result[2 * keyCount] = updChild; + result[2 * keyCount + 1] = tree[2 * keyCount + 1]; // take the original sizeMap, as we are exactly the same shape + return result; + } + // create a copy of a branch, with the exact same size, copying the specified number of keys and children private static Object[] transformCopyBranchHelper(Object[] branch, int keyCount, int copyKeyCount, int copyChildCount) { @@ -1326,6 +1376,7 @@ private static Object[] transformCopyBranchHelper(Object[] branch, int keyCount, } // an efficient transformAndFilter implementation suitable for a tree consisting of a single leaf root + // NOTE: codewise *identical* to {@link #transformLeaf(Object[], BiFunction, Object)} private static Object[] transformLeaf(Object[] leaf, Function apply) { Object[] result = leaf; // optimistically assume we'll return our input unmodified @@ -1348,6 +1399,30 @@ private static Object[] transformLeaf(Object[] leaf, Function Object[] transformLeaf(Object[] leaf, BiFunction apply, I2 param) + { + Object[] result = leaf; // optimistically assume we'll return our input unmodified + int size = sizeOfLeaf(leaf); + for (int i = 0; i < size; ++i) + { + Object current = leaf[i]; + Object updated = apply.apply((I) current, param); + if (result == leaf) + { + if (current == updated) + continue; // if output still same as input, loop + + // otherwise initialise output to a copy of input up to this point + result = new Object[leaf.length]; + System.arraycopy(leaf, 0, result, 0, i); + } + result[i] = updated; + } + return result; + } + public static boolean equals(Object[] a, Object[] b) { return size(a) == size(b) && Iterators.elementsEqual(iterator(a), iterator(b));