diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index 6e2249e83e3c..2401e5ff8893 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -188,6 +188,75 @@ public enum ReturnCode { */ abstract public Cell getNextCellHint(final Cell currentCell) throws IOException; + /** + * Provides a seek hint to bypass row-by-row scanning after {@link #filterRowKey(Cell)} rejects a + * row. When {@code filterRowKey} returns {@code true} the scan pipeline would normally iterate + * through every remaining cell in the rejected row one-by-one (via {@code nextRow()}) before + * moving on. If the filter can determine a better forward position — for example, the next range + * boundary in a {@code MultiRowRangeFilter} — it should return that target cell here, allowing + * the scanner to seek directly past the unwanted rows. + * + *
Contract: + *
Contract: + *
The skipping-row mode flag is set around the seek so that block-level size tracking + * continues to function (consistent with {@link #nextRow}), and the filter state is reset + * afterwards so the next row starts with a clean filter context. + * + * @param scannerContext scanner context used for limit tracking + * @param curRowCell the first cell of the row that was rejected by {@code filterRowKey}; + * passed to the coprocessor hook for observability + * @param hint the validated {@link ExtendedCell} returned by the filter; the + * scanner will seek to this position + * @return {@code true} if scanning should continue, {@code false} if a coprocessor requests + * an early stop (mirrors the contract of {@link #nextRow}) + * @throws IOException if the seek or the coprocessor hook signals a failure + */ + private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, ExtendedCell hint) + throws IOException { + assert this.joinedContinuationRow == null : "Trying to go to next row during joinedHeap read."; + + // Enable skipping-row mode so block-size accounting is consistent with nextRow(). + scannerContext.setSkippingRow(true); + this.storeHeap.requestSeek(hint, true, true); + scannerContext.setSkippingRow(false); + + resetFilters(); + + // Notify coprocessors, identical to the epilogue in nextRow(). + return this.region.getCoprocessorHost() == null + || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); + } + + /** + * Asks the current {@link org.apache.hadoop.hbase.filter.FilterWrapper} for a seek hint to use + * after a row has been rejected by {@link #filterRowKey}. If the wrapped filter overrides + * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}, this returns its + * answer as an {@link ExtendedCell}; otherwise returns {@code null}. + * + *
The returned cell is validated to be an {@link ExtendedCell} because filters run on the + * server side and the scanner infrastructure requires {@code ExtendedCell} references. + * + * @param rowCell the first cell of the rejected row (same cell passed to {@code filterRowKey}) + * @return a validated {@link ExtendedCell} seek target, or {@code null} if the filter provides + * no hint + * @throws DoNotRetryIOException if the filter returns a non-{@link ExtendedCell} instance + * @throws IOException if the filter signals an I/O failure + */ + private ExtendedCell getHintForRejectedRow(Cell rowCell) throws IOException { + if (filter == null) { + return null; + } + Cell hint = filter.getHintForRejectedRow(rowCell); + if (hint == null) { + return null; + } + if (!(hint instanceof ExtendedCell)) { + throw new DoNotRetryIOException( + "Incorrect filter implementation: the Cell returned by getHintForRejectedRow " + + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); + } + return (ExtendedCell) hint; + } + protected boolean shouldStop(Cell currentRowCell) { if (currentRowCell == null) { return true; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index c07b91b77e68..f3a99ca0892d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -61,6 +61,16 @@ public abstract class UserScanQueryMatcher extends ScanQueryMatcher { private ExtendedCell curColCell = null; + /** + * Holds a seek-hint produced by {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} + * at one of the structural short-circuit points in {@link #matchColumn}. When non-null this is + * returned by {@link #getNextKeyHint} instead of delegating to + * {@link org.apache.hadoop.hbase.filter.Filter#getNextCellHint}, because the hint was computed + * for a cell that never reached {@code filterCell}. Cleared on every {@link #getNextKeyHint} + * call so it cannot leak across multiple seek-hint cycles. + */ + private ExtendedCell pendingSkipHint = null; + private static ExtendedCell createStartKey(Scan scan, ScanInfo scanInfo) { if (scan.includeStartRow()) { return createStartKeyFromRow(scan.getStartRow(), scanInfo); @@ -107,18 +117,26 @@ public Filter getFilter() { @Override public ExtendedCell getNextKeyHint(ExtendedCell cell) throws IOException { + // A structural short-circuit in matchColumn (time-range, column, or version gate) may have + // stored a hint via resolveSkipHint() before returning SEEK_NEXT_USING_HINT. Drain and return + // it first; it takes priority because it was produced for the exact cell that triggered the + // seek code, without ever calling filterCell. + if (pendingSkipHint != null) { + ExtendedCell hint = pendingSkipHint; + pendingSkipHint = null; + return hint; + } + // Normal path: filterCell returned SEEK_NEXT_USING_HINT — delegate to the filter. if (filter == null) { return null; + } + Cell hint = filter.getNextCellHint(cell); + if (hint == null || hint instanceof ExtendedCell) { + return (ExtendedCell) hint; } else { - Cell hint = filter.getNextCellHint(cell); - if (hint == null || hint instanceof ExtendedCell) { - return (ExtendedCell) hint; - } else { - throw new DoNotRetryIOException("Incorrect filter implementation, " - + "the Cell returned by getNextKeyHint is not an ExtendedCell. Filter class: " - + filter.getClass().getName()); - } - + throw new DoNotRetryIOException("Incorrect filter implementation, " + + "the Cell returned by getNextKeyHint is not an ExtendedCell. Filter class: " + + filter.getClass().getName()); } } @@ -134,14 +152,40 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty throws IOException { int tsCmp = tr.compare(timestamp); if (tsCmp > 0) { + // Cell is newer than the scan's time-range upper bound. Give the filter one last chance to + // provide a seek hint before we fall back to a plain cell-level SKIP. This addresses + // HBASE-29974 Path 2: time-range gate fires before filterCell is reached. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return MatchCode.SKIP; } if (tsCmp < 0) { + // Cell is older than the scan's time-range lower bound. Give the filter a chance to provide + // a seek hint before we defer to the column tracker's next-row/next-column suggestion. + // Addresses HBASE-29974 Path 2: time-range gate fires before filterCell is reached. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return columns.getNextRowOrNextColumn(cell); } // STEP 1: Check if the column is part of the requested columns MatchCode matchCode = columns.checkColumn(cell, typeByte); if (matchCode != MatchCode.INCLUDE) { + // Column is excluded by the scan's column set. Give the filter a chance to provide a + // seek hint before the column-tracker's suggestion is used. Addresses HBASE-29974 Path 3. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return matchCode; } /* @@ -151,8 +195,24 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty matchCode = columns.checkVersions(cell, timestamp, typeByte, false); switch (matchCode) { case SKIP: + // Version limit reached; skip this cell. Give the filter a hint opportunity before + // falling back to SKIP. Addresses HBASE-29974 Path 3: version gate fires before filterCell. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return MatchCode.SKIP; case SEEK_NEXT_COL: + // Version limit reached; advance to the next column. Give the filter a hint opportunity + // before falling back to SEEK_NEXT_COL. Addresses HBASE-29974 Path 3. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return MatchCode.SEEK_NEXT_COL; default: // It means it is INCLUDE, INCLUDE_AND_SEEK_NEXT_COL or INCLUDE_AND_SEEK_NEXT_ROW. @@ -166,6 +226,35 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty : mergeFilterResponse(cell, matchCode, filter.filterCell(cell)); } + /** + * Calls {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} on the current filter, + * validates the returned cell type, and stores it as {@link #pendingSkipHint} so that + * {@link #getNextKeyHint} can return it when the scan pipeline asks for the seek target after + * receiving {@link ScanQueryMatcher.MatchCode#SEEK_NEXT_USING_HINT}. + * + *
This is only called from the structural short-circuit branches of {@link #matchColumn}, + * where {@code filterCell} has not been called, in accordance with the stateless + * contract of {@code Filter#getSkipHint}. + * + * @param cell the cell that triggered the structural short-circuit + * @return the validated {@link ExtendedCell} hint, or {@code null} if the filter returns no hint + * @throws DoNotRetryIOException if the filter returns a non-{@link ExtendedCell} instance + * @throws IOException if the filter signals an I/O failure + */ + private ExtendedCell resolveSkipHint(ExtendedCell cell) throws IOException { + Cell raw = filter.getSkipHint(cell); + if (raw == null) { + return null; + } + if (!(raw instanceof ExtendedCell)) { + throw new DoNotRetryIOException( + "Incorrect filter implementation: the Cell returned by getSkipHint " + + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); + } + pendingSkipHint = (ExtendedCell) raw; + return pendingSkipHint; + } + /** * Call this when scan has filter. Decide the desired behavior by checkVersions's MatchCode and * filterCell's ReturnCode. Cell may be skipped by filter, so the column versions in result may be diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java new file mode 100644 index 000000000000..7207fa245d83 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -0,0 +1,367 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.RegionScanner; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +/** + * Integration tests for HBASE-29974 Path 1: + * {@link Filter#getHintForRejectedRow(Cell)} allows the scan pipeline to seek directly past + * rejected rows instead of iterating through every cell in each rejected row one-by-one via + * {@code nextRow()}. + * + *
Each test verifies two properties simultaneously: + *
The filter rejects the first 5 rows ({@code "row-00"} through {@code "row-04"}) via + * {@link Filter#filterRowKey(Cell)} and, for every rejected row, returns a hint pointing + * directly to {@code "row-05"} via {@link Filter#getHintForRejectedRow(Cell)}. + * + *
Expected behaviour: + *
When {@link Filter#getHintForRejectedRow(Cell)} returns {@code null} (the default from
+ * {@link FilterBase}), the scanner must fall back to the existing cell-by-cell {@code nextRow()}
+ * behaviour and still return the correct results. This test acts as a regression guard to
+ * confirm that the null-hint path does not alter observable scan results.
+ */
+ @Test
+ public void testNullHintFallsThroughToLegacyNextRowBehaviour() throws IOException {
+ final String prefix = "hint-null";
+ final int rejectedCount = 3;
+ final int acceptedCount = 3;
+ writeRows(prefix, rejectedCount + acceptedCount);
+
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount));
+
+ // This filter rejects rows but does NOT override getHintForRejectedRow (returns null).
+ FilterBase noHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+ };
+
+ List If the filter's {@link Filter#getHintForRejectedRow(Cell)} returns a position that is
+ * past the end of the table (beyond all written rows), the scan must complete without
+ * returning any results and without throwing an exception.
+ */
+ @Test
+ public void testHintBeyondLastRowTerminatesScanGracefully() throws IOException {
+ final String prefix = "hint-beyond";
+ writeRows(prefix, 5);
+
+ // The hint points past "zzz", which is lexicographically after all "hint-beyond-XX" rows.
+ final byte[] beyondAllRows = Bytes.toBytes("zzz-beyond-table-end");
+
+ FilterBase beyondHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return true; // reject every row
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ return PrivateCellUtil.createFirstOnRow(beyondAllRows);
+ }
+ };
+
+ List When the filter provides a hint that advances only one row at a time (i.e. it always
+ * points to the immediate next row key), {@code getHintForRejectedRow} is called once per
+ * rejected row. This verifies that the hint mechanism works correctly in the incremental-step
+ * case, not just the bulk-jump case.
+ */
+ @Test
+ public void testPerRowHintCalledOncePerRejectedRow() throws IOException {
+ final String prefix = "hint-perrow";
+ final int rejectedCount = 4;
+ final int acceptedCount = 2;
+ writeRows(prefix, rejectedCount + acceptedCount);
+
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount));
+ final AtomicInteger hintCallCount = new AtomicInteger(0);
+
+ FilterBase perRowHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ /**
+ * Returns a hint pointing to the cell immediately after the current one (one row at a
+ * time). This causes the scanner to seek per-row, so this method is called once for
+ * each rejected row.
+ */
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ hintCallCount.incrementAndGet();
+ // Create a key that is one step past the current row key.
+ return PrivateCellUtil.createFirstOnNextRow(firstRowCell);
+ }
+ };
+
+ List When a cell's timestamp is newer than the scan's time-range maximum ({@code ts >= maxTs}),
+ * the original code returns {@link MatchCode#SKIP} without ever calling {@code filterCell}.
+ * With the fix, the matcher must first ask the filter for a skip hint. If the filter returns a
+ * non-null hint, {@link MatchCode#SEEK_NEXT_USING_HINT} must be returned and
+ * {@link UserScanQueryMatcher#getNextKeyHint} must return that hint.
+ */
+ @Test
+ public void testSkipHintConsultedForCellNewerThanTimeRange() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+ long minTs = now - 2000;
+ long maxTs = now - 1000;
+
+ // The hint points to a cell in the middle of the valid time range so the scanner can resume.
+ KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 1500, data);
+ Scan timeRangeScan =
+ new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new FixedSkipHintFilter(hintCell));
+
+ long now2 = EnvironmentEdgeManager.currentTime();
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
+ HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false),
+ null, 0, now2, null);
+
+ // ts=now2 >= maxTs, so tsCmp > 0: time-range gate fires before filterCell (Path 2).
+ KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data);
+ qm.setToNewRow(tooNew);
+
+ assertEquals("Filter hint must promote SKIP to SEEK_NEXT_USING_HINT",
+ MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew));
+ assertEquals("getNextKeyHint must return the pending skip hint", hintCell,
+ qm.getNextKeyHint(tooNew));
+ // pendingSkipHint is consumed on the first getNextKeyHint call; subsequent calls fall through
+ // to FilterBase#getNextCellHint which returns null.
+ assertNull("pendingSkipHint must be cleared after being drained", qm.getNextKeyHint(tooNew));
+ }
+
+ /**
+ * HBASE-29974 Path 2 — time-range lower-bound gate (tsCmp < 0).
+ *
+ * When a cell's timestamp is older than the scan's time-range minimum ({@code ts < minTs}),
+ * the original code returns the column-tracker's next-row-or-next-column suggestion without
+ * consulting the filter. With the fix, the matcher must first ask the filter for a skip hint,
+ * and prefer it over the column-tracker's answer when non-null.
+ */
+ @Test
+ public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+ long minTs = now - 500;
+ long maxTs = now;
+
+ KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 100, data);
+ Scan timeRangeScan =
+ new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new FixedSkipHintFilter(hintCell));
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
+ HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false),
+ null, 0, now, null);
+
+ // ts = now-1000 < minTs (now-500), so tsCmp < 0: time-range gate fires (Path 2).
+ KeyValue tooOld = new KeyValue(row1, fam2, col1, now - 1000, data);
+ qm.setToNewRow(tooOld);
+
+ assertEquals("Filter hint must promote column-tracker result to SEEK_NEXT_USING_HINT",
+ MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooOld));
+ assertEquals("getNextKeyHint must return the pending skip hint", hintCell,
+ qm.getNextKeyHint(tooOld));
+ assertNull("pendingSkipHint must be cleared after being drained", qm.getNextKeyHint(tooOld));
+ }
+
+ /**
+ * HBASE-29974 Path 2 — time-range gate, null hint falls through.
+ *
+ * When the filter's {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} returns
+ * {@code null}, the matcher must fall back to the original structural skip code (SKIP or the
+ * column-tracker result) with no regression.
+ */
+ @Test
+ public void testNullSkipHintFallsThroughToOriginalCodeOnTimeRangeGate() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+ long minTs = now - 2000;
+ long maxTs = now - 1000;
+
+ // AlwaysIncludeFilter does not override getSkipHint(), so it returns null.
+ Scan timeRangeScan =
+ new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new AlwaysIncludeFilter());
+
+ long now2 = EnvironmentEdgeManager.currentTime();
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
+ HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false),
+ null, 0, now2, null);
+
+ KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data);
+ qm.setToNewRow(tooNew);
+
+ // With a null hint the gate must still return the original SKIP code.
+ assertEquals("Null getSkipHint must not change the original SKIP match code",
+ MatchCode.SKIP, qm.match(tooNew));
+ }
+
+ /**
+ * HBASE-29974 Path 3 — column-exclusion gate ({@code checkColumn() != INCLUDE}).
+ *
+ * When an explicit-column scan encounters a qualifier not in its requested set, the column
+ * tracker returns a skip/seek code before {@code filterCell} is ever reached. With the fix,
+ * the matcher must consult {@code getSkipHint} first, and return
+ * {@link MatchCode#SEEK_NEXT_USING_HINT} when a non-null hint is available.
+ */
+ @Test
+ public void testSkipHintConsultedForExcludedColumn() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+
+ // get.getFamilyMap().get(fam2) contains col2, col4, col5; presenting col1 triggers exclusion.
+ KeyValue hintCell = new KeyValue(row1, fam2, col2, 1, data);
+ Scan scanWithFilter = new Scan(scan).setFilter(new FixedSkipHintFilter(hintCell));
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
+ HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false),
+ get.getFamilyMap().get(fam2), now - ttl, now, null);
+
+ // col1 is not in {col2, col4, col5}: checkColumn returns a non-INCLUDE code.
+ KeyValue excludedCol = new KeyValue(row1, fam2, col1, 1, data);
+ qm.setToNewRow(excludedCol);
+
+ assertEquals("Filter hint must promote column-exclusion result to SEEK_NEXT_USING_HINT",
+ MatchCode.SEEK_NEXT_USING_HINT, qm.match(excludedCol));
+ assertEquals("getNextKeyHint must return the pending skip hint", hintCell,
+ qm.getNextKeyHint(excludedCol));
+ assertNull("pendingSkipHint must be cleared after being drained",
+ qm.getNextKeyHint(excludedCol));
+ }
+
+ /**
+ * HBASE-29974 Path 3 — version-exhaustion gate ({@code checkVersions()} returns
+ * {@link MatchCode#SEEK_NEXT_COL}).
+ *
+ * When the column tracker has already seen the maximum number of versions for a qualifier,
+ * it returns SKIP or SEEK_NEXT_COL for subsequent versions, again bypassing {@code filterCell}.
+ * With the fix, the matcher must consult {@code getSkipHint} at that point.
+ */
+ @Test
+ public void testSkipHintConsultedOnVersionExhaustion() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+
+ KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data);
+ // ScanInfo maxVersions=1: column tracker allows exactly 1 version per qualifier.
+ Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell));
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
+ HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false),
+ null, now - ttl, now, null);
+
+ // First version of col1: passes all gates and reaches filterCell.
+ KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data);
+ qm.setToNewRow(version1);
+ MatchCode firstCode = qm.match(version1);
+ assertEquals("First version must be included", MatchCode.INCLUDE, firstCode);
+
+ // Second version of col1: checkVersions returns SEEK_NEXT_COL (version limit exceeded).
+ // The filter hint must be consulted and SEEK_NEXT_USING_HINT returned.
+ KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data);
+ assertEquals("Filter hint must promote version-exhaustion result to SEEK_NEXT_USING_HINT",
+ MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2));
+ assertEquals("getNextKeyHint must return the pending skip hint from version gate", hintCell,
+ qm.getNextKeyHint(version2));
+ assertNull("pendingSkipHint must be cleared after being drained",
+ qm.getNextKeyHint(version2));
+ }
+
+ /**
+ * HBASE-29974 — {@code pendingSkipHint} must not interfere with the normal
+ * {@code filterCell → SEEK_NEXT_USING_HINT → getNextCellHint} path.
+ *
+ * If a cell passes all structural gates and reaches {@code filterCell}, which then returns
+ * {@link ReturnCode#SEEK_NEXT_USING_HINT}, the existing {@link Filter#getNextCellHint(Cell)}
+ * mechanism must still be used (not {@code getSkipHint}), and {@code pendingSkipHint} must
+ * remain null so it does not contaminate the result.
+ */
+ @Test
+ public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+
+ final KeyValue filterHintCell = new KeyValue(row2, fam2, col1, now, data);
+
+ // A filter that returns SEEK_NEXT_USING_HINT from filterCell and provides a hint through
+ // getNextCellHint, while NOT overriding getSkipHint (returns null by default).
+ FilterBase seekFilter = new FilterBase() {
+ @Override
+ public ReturnCode filterCell(Cell c) {
+ return ReturnCode.SEEK_NEXT_USING_HINT;
+ }
+
+ @Override
+ public Cell getNextCellHint(Cell currentCell) {
+ return filterHintCell;
+ }
+ };
+
+ Scan scanWithFilter =
+ new Scan().addFamily(fam2).setFilter(seekFilter);
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
+ HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false),
+ null, now - ttl, now, null);
+
+ KeyValue cell = new KeyValue(row1, fam2, col1, now - 10, data);
+ qm.setToNewRow(cell);
+
+ assertEquals("filterCell path must still return SEEK_NEXT_USING_HINT",
+ MatchCode.SEEK_NEXT_USING_HINT, qm.match(cell));
+ // pendingSkipHint was never set (getSkipHint returned null), so getNextKeyHint must
+ // delegate to filter.getNextCellHint().
+ assertEquals("getNextKeyHint must delegate to getNextCellHint when no skip hint is pending",
+ filterHintCell, qm.getNextKeyHint(cell));
+ }
}