Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
package org.apache.hadoop.hbase.regionserver.querymatcher;

import java.io.IOException;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.ExtendedCell;
import org.apache.hadoop.hbase.KeepDeletedCells;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.ScanInfo;
Expand All @@ -31,6 +34,14 @@
@InterfaceAudience.Private
public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {

/**
* Number of consecutive range delete markers (DeleteColumn/DeleteFamily) to skip before switching
* to seek. Seeking is more expensive than skipping for a single marker, but much faster when
* markers accumulate. This threshold avoids the seek overhead for the common case (one delete per
* row/column) while still kicking in when markers pile up.
*/
static final int SEEK_ON_DELETE_MARKER_THRESHOLD = 10;

/** Keeps track of deletes */
private final DeleteTracker deletes;

Expand All @@ -40,18 +51,32 @@ public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {
/** whether time range queries can see rows "behind" a delete */
protected final boolean seePastDeleteMarkers;

/** Whether seek optimization for range delete markers is applicable */
private final boolean canSeekOnDeleteMarker;

/** Count of consecutive range delete markers seen for the same column */
private int rangeDeleteCount;

/** Last range delete cell, for qualifier comparison across consecutive markers */
private ExtendedCell lastDelete;

protected NormalUserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now) {
super(scan, scanInfo, columns, hasNullColumn, oldestUnexpiredTS, now);
this.deletes = deletes;
this.get = scan.isGetScan();
this.seePastDeleteMarkers = scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE;
this.canSeekOnDeleteMarker =
!seePastDeleteMarkers && deletes.getClass() == ScanDeleteTracker.class;
}

@Override
public void beforeShipped() throws IOException {
super.beforeShipped();
deletes.beforeShipped();
if (lastDelete != null) {
lastDelete = KeyValueUtil.toNewKeyCell(lastDelete);
}
}

@Override
Expand All @@ -71,8 +96,31 @@ public MatchCode match(ExtendedCell cell) throws IOException {
if (includeDeleteMarker) {
this.deletes.add(cell);
}

// A DeleteColumn or DeleteFamily masks all remaining cells for this column/family.
// Seek past them instead of skipping one cell at a time, but only after seeing
// enough consecutive markers for the same column to justify the seek overhead.
// Only safe with plain ScanDeleteTracker. Not safe with newVersionBehavior (sequence
// IDs determine visibility), visibility labels (delete/put label mismatch), or
// seePastDeleteMarkers (KEEP_DELETED_CELLS).
if (
canSeekOnDeleteMarker && (typeByte == KeyValue.Type.DeleteFamily.getCode()
|| (typeByte == KeyValue.Type.DeleteColumn.getCode() && cell.getQualifierLength() > 0))
) {
if (lastDelete != null && !CellUtil.matchingQualifier(cell, lastDelete)) {
rangeDeleteCount = 0;
}
lastDelete = cell;
if (++rangeDeleteCount >= SEEK_ON_DELETE_MARKER_THRESHOLD) {
rangeDeleteCount = 0;
return columns.getNextRowOrNextColumn(cell);
}
} else {
rangeDeleteCount = 0;
}
return MatchCode.SKIP;
}
rangeDeleteCount = 0;
returnCode = checkDeleted(deletes, cell);
if (returnCode != null) {
return returnCode;
Expand All @@ -83,6 +131,8 @@ public MatchCode match(ExtendedCell cell) throws IOException {
@Override
protected void reset() {
deletes.reset();
rangeDeleteCount = 0;
lastDelete = null;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,11 @@ public void setToNewRow(ExtendedCell currentRow) {
public abstract boolean moreRowsMayExistAfter(ExtendedCell cell);

public ExtendedCell getKeyForNextColumn(ExtendedCell cell) {
// We aren't sure whether any DeleteFamily cells exist, so we can't skip to next column.
// TODO: Current way disable us to seek to next column quickly. Is there any better solution?
// see HBASE-18471 for more details
// see TestFromClientSide3#testScanAfterDeletingSpecifiedRow
// see TestFromClientSide3#testScanAfterDeletingSpecifiedRowV2
if (cell.getQualifierLength() == 0) {
// For cells with empty qualifier, we generally can't skip to the next column because
// DeleteFamily cells might exist that we haven't seen yet (see HBASE-18471).
// However, if the cell itself IS a DeleteFamily marker, we know we've already processed it,
// so we can safely seek to the next real column.
if (cell.getQualifierLength() == 0 && !PrivateCellUtil.isDeleteFamily(cell)) {
ExtendedCell nextKey = PrivateCellUtil.createNextOnRowCol(cell);
if (nextKey != cell) {
return nextKey;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeepDeletedCells;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterBase;
Expand Down Expand Up @@ -396,4 +397,144 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE,
Cell nextCell = qm.getKeyForNextColumn(lastCell);
assertArrayEquals(nextCell.getQualifierArray(), col4);
}

/**
* After enough consecutive range delete markers, the matcher should switch from SKIP to
* SEEK_NEXT_COL. Point deletes and KEEP_DELETED_CELLS always SKIP.
*/
@Test
public void testSeekOnRangeDelete() throws IOException {
int n = NormalUserScanQueryMatcher.SEEK_ON_DELETE_MARKER_THRESHOLD;

// DeleteColumn: first N-1 SKIP, N-th triggers SEEK_NEXT_COL
assertSeekAfterThreshold(KeepDeletedCells.FALSE, Type.DeleteColumn, n);

// DeleteFamily: same threshold behavior
assertSeekAfterThreshold(KeepDeletedCells.FALSE, Type.DeleteFamily, n);

// Delete (version): always SKIP (point delete, not range)
assertAllSkip(KeepDeletedCells.FALSE, Type.Delete, n + 1);

// KEEP_DELETED_CELLS=TRUE: always SKIP
assertAllSkip(KeepDeletedCells.TRUE, Type.DeleteColumn, n + 1);
}

/**
* DeleteColumn with empty qualifier must not cause seeking past a subsequent DeleteFamily.
* DeleteFamily masks all columns, so it must be tracked by the delete tracker.
*/
@Test
public void testDeleteColumnEmptyQualifierDoesNotSkipDeleteFamily() throws IOException {
long now = EnvironmentEdgeManager.currentTime();
byte[] e = HConstants.EMPTY_BYTE_ARRAY;
UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam1, 0, 1,
ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), null,
now - ttl, now, null);

int n = NormalUserScanQueryMatcher.SEEK_ON_DELETE_MARKER_THRESHOLD;
// Feed DCs with empty qualifier past the threshold, then a DF.
// The DF must NOT be seeked past -- it must be SKIP'd so the tracker picks it up.
qm.setToNewRow(new KeyValue(row1, fam1, e, now, Type.DeleteColumn));
for (int i = 0; i < n + 1; i++) {
// Empty qualifier DCs should never trigger seek, regardless of threshold
assertEquals("DC at i=" + i, MatchCode.SKIP,
qm.match(new KeyValue(row1, fam1, e, now - i, Type.DeleteColumn)));
}
KeyValue df = new KeyValue(row1, fam1, e, now - n - 1, Type.DeleteFamily);
KeyValue put = new KeyValue(row1, fam1, col1, now - n - 1, Type.Put, data);
// DF must be processed (SKIP), not seeked past
assertEquals(MatchCode.SKIP, qm.match(df));
// Put in col1 at t=now-3 should be masked by DF@t=now-3
MatchCode putCode = qm.match(put);
assertEquals(MatchCode.SEEK_NEXT_COL, putCode);
}

/**
* DeleteColumn markers for different qualifiers should not accumulate the seek counter. Only
* consecutive markers for the same qualifier should trigger seeking.
*/
@Test
public void testDeleteColumnDifferentQualifiersDoNotSeek() throws IOException {
long now = EnvironmentEdgeManager.currentTime();
UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam1, 0, 1,
ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), null,
now - ttl, now, null);

// DCs for different qualifiers: counter resets on qualifier change, never seeks
qm.setToNewRow(new KeyValue(row1, fam1, col1, now, Type.DeleteColumn));
assertEquals(MatchCode.SKIP, qm.match(new KeyValue(row1, fam1, col1, now, Type.DeleteColumn)));
assertEquals(MatchCode.SKIP,
qm.match(new KeyValue(row1, fam1, col2, now - 1, Type.DeleteColumn)));
assertEquals(MatchCode.SKIP,
qm.match(new KeyValue(row1, fam1, col3, now - 2, Type.DeleteColumn)));
assertEquals(MatchCode.SKIP,
qm.match(new KeyValue(row1, fam1, col4, now - 3, Type.DeleteColumn)));
assertEquals(MatchCode.SKIP,
qm.match(new KeyValue(row1, fam1, col5, now - 4, Type.DeleteColumn)));
}

/**
* Delete markers outside the scan's time range (includeDeleteMarker=false) should still
* accumulate the seek counter and trigger SEEK_NEXT_COL after the threshold.
*/
@Test
public void testSeekOnRangeDeleteOutsideTimeRange() throws IOException {
long now = EnvironmentEdgeManager.currentTime();
long futureTs = now + 1_000_000;
Scan scanWithTimeRange = new Scan(scan).setTimeRange(futureTs, Long.MAX_VALUE);

UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithTimeRange,
new ScanInfo(this.conf, fam1, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE,
0, rowComparator, false),
null, now - ttl, now, null);

int n = NormalUserScanQueryMatcher.SEEK_ON_DELETE_MARKER_THRESHOLD;
qm.setToNewRow(new KeyValue(row1, fam1, col1, now, Type.DeleteColumn));
// All DCs have timestamps below the time range, so includeDeleteMarker is false.
// The seek counter should still accumulate.
for (int i = 0; i < n - 1; i++) {
assertEquals("DC at i=" + i, MatchCode.SKIP,
qm.match(new KeyValue(row1, fam1, col1, now - i, Type.DeleteColumn)));
}
assertEquals(MatchCode.SEEK_NEXT_COL,
qm.match(new KeyValue(row1, fam1, col1, now - n + 1, Type.DeleteColumn)));
}

private UserScanQueryMatcher createDeleteMatcher(KeepDeletedCells keepDeletedCells)
throws IOException {
long now = EnvironmentEdgeManager.currentTime();
return UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam1, 0, 1, ttl,
keepDeletedCells, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), null, now - ttl,
now, null);
}

/** First n-1 markers SKIP, n-th triggers SEEK_NEXT_COL. */
private void assertSeekAfterThreshold(KeepDeletedCells keepDeletedCells, Type type, int n)
throws IOException {
long now = EnvironmentEdgeManager.currentTime();
UserScanQueryMatcher qm = createDeleteMatcher(keepDeletedCells);
boolean familyLevel = type == Type.DeleteFamily || type == Type.DeleteFamilyVersion;
byte[] qual = familyLevel ? HConstants.EMPTY_BYTE_ARRAY : col1;
qm.setToNewRow(new KeyValue(row1, fam1, qual, now, type));
for (int i = 0; i < n - 1; i++) {
assertEquals("Mismatch at index " + i, MatchCode.SKIP,
qm.match(new KeyValue(row1, fam1, qual, now - i, type)));
}
assertEquals("Expected SEEK_NEXT_COL at index " + (n - 1), MatchCode.SEEK_NEXT_COL,
qm.match(new KeyValue(row1, fam1, qual, now - n + 1, type)));
}

/** All markers should SKIP regardless of count. */
private void assertAllSkip(KeepDeletedCells keepDeletedCells, Type type, int count)
throws IOException {
long now = EnvironmentEdgeManager.currentTime();
UserScanQueryMatcher qm = createDeleteMatcher(keepDeletedCells);
boolean familyLevel = type == Type.DeleteFamily || type == Type.DeleteFamilyVersion;
byte[] qual = familyLevel ? HConstants.EMPTY_BYTE_ARRAY : col1;
qm.setToNewRow(new KeyValue(row1, fam1, qual, now, type));
for (int i = 0; i < count; i++) {
assertEquals("Mismatch at index " + i, MatchCode.SKIP,
qm.match(new KeyValue(row1, fam1, qual, now - i, type)));
}
}
}