From acc19d109e1d3bcd472968dbdd9ebb22fe4950f1 Mon Sep 17 00:00:00 2001
From: Faizan Qazi <faizan@cockroachlabs.com>
Date: Wed, 18 Mar 2026 19:48:09 +0000
Subject: [PATCH] sql: fix partial index data loss / phantom rows during update

This commit fixes a bug on tables with multiple column families where a
concurrent update that does not overlap with a partial index's column
family could cause the partial index to write a NULL instead of the
actual data, or incorrectly add phantom rows to a temporary index
during a schema change backfill.

This bug was previously masked on default (single-column-family) tables
because an update to any column causes the optimizer to conservatively
fetch all columns in that family. However, with multiple column families,
two normalization rules in the optimizer caused issues:

1. PruneMutationFetchCols: If an update does not change any column
   associated with an index, the optimizer avoids fetching those
   columns. This causes the execution layer to see NULLs for the
   unfetched columns.
2. SimplifyPartialIndexProjections: If an update does not change
   any column associated with a partial index, the optimizer
   simplifies the partial index predicate evaluation to FALSE.
   This causes the execution layer to skip writes to the index.

During a schema change backfill, the execution layer's updater must
unconditionally write complete index entries to temporary (mutating)
indexes for any concurrent update, even if the index's columns are
unchanged. This ensures the backfill merger has a complete snapshot
to correctly reconcile the final index. If columns are pruned (Rule 1)
or writes are simplified away (Rule 2), the temporary index receives
incomplete entries (NULLs) or misses the update entirely.

Furthermore, missing columns can lead to phantom rows if the partial
index predicate evaluates to TRUE when given a NULL value (e.g.,
WHERE val IS NULL).

This change ensures the optimizer always fetches the required
columns and avoids simplifying predicate evaluation if the index
is a mutation index, correctly propagating the full row state to the
execution layer.

Fixes: #166122

Release note (bug fix): Fixed a bug where concurrent updates to a table
using multiple column families during a partial index creation could
result in data loss, incorrect NULL values, or validation failures in
the resulting index.
---
 pkg/sql/mvcc_backfiller_test.go      | 81 ++++++++++++++++++++++++++++
 pkg/sql/opt/cat/index.go             | 10 +++-
 pkg/sql/opt/norm/mutation_funcs.go   |  7 ++-
 pkg/sql/opt/norm/prune_cols_funcs.go |  6 ++-
 4 files changed, 100 insertions(+), 4 deletions(-)

diff --git a/pkg/sql/mvcc_backfiller_test.go b/pkg/sql/mvcc_backfiller_test.go
index 8b55278ac1c1..02bea2b1f240 100644
--- a/pkg/sql/mvcc_backfiller_test.go
+++ b/pkg/sql/mvcc_backfiller_test.go
@@ -40,6 +40,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
 	"github.com/cockroachdb/cockroach/pkg/upgrade/upgradebase"
 	"github.com/cockroachdb/cockroach/pkg/util"
+	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
 	"github.com/cockroachdb/cockroach/pkg/util/hlc"
 	"github.com/cockroachdb/cockroach/pkg/util/json"
 	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
@@ -996,3 +997,83 @@ CREATE TABLE t.test (k INT PRIMARY KEY, v int);`
 	tdb.Exec(t, "CREATE INDEX ON t.test (v)")
 	require.True(t, called)
 }
+
+// TestPartialIndexBackfillWithConcurrentFamilyUpdate validates that concurrent updates
+// with multiple column families still propagate to secondary indexes being constructed.
+func TestPartialIndexBackfillWithConcurrentFamilyUpdate(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	var (
+		s     serverutils.TestServerInterface
+		sqlDB *gosql.DB
+	)
+
+	var mergeReady chan struct{}
+	var waitForMerge chan struct{}
+
+	params := base.TestServerArgs{}
+	params.Knobs = base.TestingKnobs{
+		SQLSchemaChanger: &sql.SchemaChangerTestingKnobs{
+			BackfillChunkSize: 100,
+		},
+		DistSQL: &execinfra.TestingKnobs{
+			IndexBackfillMergerTestingKnobs: &backfill.IndexBackfillMergerTestingKnobs{
+				RunBeforeScanChunk: func(startKey roachpb.Key) error {
+					mergeReady <- struct{}{}
+					<-waitForMerge
+					return nil
+				},
+			},
+		},
+		JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(),
+	}
+
+	s, sqlDB, _ = serverutils.StartServer(t, params)
+	defer s.Stopper().Stop(context.Background())
+
+	mergeReady = make(chan struct{})
+	waitForMerge = make(chan struct{})
+	runner := sqlutils.MakeSQLRunner(sqlDB)
+	runner.Exec(t, `
+CREATE DATABASE t;
+CREATE TABLE t.test (
+    id INT PRIMARY KEY,
+    val STRING,
+    other_val STRING,
+    FAMILY f1(id, val),
+    FAMILY f2(other_val)
+);
+`)
+	// Insert two rows that match predicate for the index we are creating below.
+	runner.Exec(t, `
+INSERT INTO t.test VALUES 
+(1, 'BLAH', 'MEH'), 
+(2, 'BLAH', 'BLEH'),
+(3, 'MEH', 'GLEE'),
+(4, 'BLEH', 'GLAH');`)
+
+	// Run index creation in the background.
+	grp := ctxgroup.WithContext(context.Background())
+	grp.GoCtx(func(ctx context.Context) error {
+		// Create a partial index on values that match some predicate.
+		_, err := sqlDB.Exec("CREATE INDEX idx_test on t.test(val) WHERE val LIKE '%BLAH%'")
+		return err
+	})
+	// Wait for the merge step to start.
+	<-mergeReady
+	// Updates on an unrelated column should still inject updates the new partial index,
+	// while its mutating.
+	runner.Exec(t, `UPDATE t.test SET other_val = 'B' WHERE id IN (1, 2);`)
+	// Also, validate deletions behave correctly.
+	runner.Exec(t, `DELETE FROM t.test WHERE other_val='GLAH';`)
+	// Allow the merge to continue
+	close(waitForMerge)
+	// Wait for schema change to finish
+	require.NoError(t, grp.Wait())
+	// Confirm with a scan of the partial index that everything looks sound.
+	runner.CheckQueryResults(t, `SELECT id, val FROM t.test@idx_test WHERE val LIKE '%BLAH%' ORDER BY id`, [][]string{
+		{"1", "BLAH"},
+		{"2", "BLAH"},
+	})
+}
diff --git a/pkg/sql/opt/cat/index.go b/pkg/sql/opt/cat/index.go
index ba42fb1172ad..3c9c54d02264 100644
--- a/pkg/sql/opt/cat/index.go
+++ b/pkg/sql/opt/cat/index.go
@@ -210,8 +210,8 @@ type Index interface {
 	// definition, where i < PartitionCount.
 	Partition(i int) Partition
 
-	// IsTemporaryIndexForBackfill returns true iff the index is an index being
-	// used as the temporary index being used by an in-progress index backfill.
+	// IsTemporaryIndexForBackfill returns true if the index is a temporary index
+	// for an in-progress index backfill.
 	IsTemporaryIndexForBackfill() bool
 }
 
@@ -232,6 +232,12 @@ func IsMutationIndex(table Table, ord IndexOrdinal) bool {
 	return ord >= table.IndexCount()
 }
 
+// IsTemporaryMutationIndex is a convenience function that returns true if the index at
+// the given ordinal position is a mutation index and is temporary.
+func IsTemporaryMutationIndex(table Table, ord IndexOrdinal) bool {
+	return IsMutationIndex(table, ord) && table.Index(ord).IsTemporaryIndexForBackfill()
+}
+
 // Partition is an interface to a PARTITION BY LIST partition of an index. The
 // intended use is to support planning of scans or lookup joins that will use
 // locality optimized search. Locality optimized search can be planned when the
diff --git a/pkg/sql/opt/norm/mutation_funcs.go b/pkg/sql/opt/norm/mutation_funcs.go
index dc0cf0c1ac54..1bd418946c79 100644
--- a/pkg/sql/opt/norm/mutation_funcs.go
+++ b/pkg/sql/opt/norm/mutation_funcs.go
@@ -98,6 +98,11 @@ func (c *CustomFuncs) SimplifiablePartialIndexProjectCols(
 		// required. Therefore, the partial index PUT and DEL columns cannot be
 		// simplified.
 		//
+		// If a secondary index is being built, then there will be points where
+		// all values will be forced with a Put. As a result, runtime expects mutated
+		// temporary indexes to always have values available for writes during index construction,
+		// even if the mutation does not touch those columns.
+		//
 		// Note that we use the set of index columns where the virtual
 		// columns have been mapped to their source columns. Virtual columns
 		// are never part of the updated columns. Updates to source columns
@@ -105,7 +110,7 @@ func (c *CustomFuncs) SimplifiablePartialIndexProjectCols(
 		predFilters := *pred.(*memo.FiltersExpr)
 		indexAndPredCols := tabMeta.IndexColumnsMapInverted(i)
 		indexAndPredCols.UnionWith(predFilters.OuterCols())
-		if indexAndPredCols.Intersects(updateCols) {
+		if indexAndPredCols.Intersects(updateCols) || cat.IsTemporaryMutationIndex(tabMeta.Table, i) {
 			continue
 		}
 
diff --git a/pkg/sql/opt/norm/prune_cols_funcs.go b/pkg/sql/opt/norm/prune_cols_funcs.go
index ec8f090a98f3..ec4de4160f50 100644
--- a/pkg/sql/opt/norm/prune_cols_funcs.go
+++ b/pkg/sql/opt/norm/prune_cols_funcs.go
@@ -195,7 +195,11 @@ func (c *CustomFuncs) NeededMutationFetchCols(
 				predFilters := *pred.(*memo.FiltersExpr)
 				indexAndPredCols.UnionWith(predFilters.OuterCols())
 			}
-			if !indexAndPredCols.Intersects(updateCols) {
+			// If a secondary index is being built, then there will be points where
+			// all values will be forced with a Put. As a result, runtime expects temporary
+			// mutated indexes to always have values available for writes during index
+			// construction, even if the mutation does not touch those columns.
+			if !indexAndPredCols.Intersects(updateCols) && !cat.IsTemporaryMutationIndex(tabMeta.Table, i) {
 				continue
 			}