-
Notifications
You must be signed in to change notification settings - Fork 67
fix: avoid using uninitialized memory #172
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,154 @@ | ||
| -- Test RUM index scan correctness after concurrent VACUUM removes all | ||
| -- posting tree entry items. | ||
| SET enable_seqscan TO off; | ||
| SET enable_indexscan TO off; | ||
| SET enable_bitmapscan TO on; | ||
| CREATE TABLE test_rum_vacuum (id int, body tsvector); | ||
| ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false); | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i; | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i; | ||
| CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops); | ||
| DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5; | ||
| DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999; | ||
| -- test normal result | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); | ||
| id | body | ||
| ----+------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| (1 row) | ||
|
|
||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| id | body | ||
| -------+-------------------- | ||
| 10000 | 'great':1 'john':2 | ||
| (1 row) | ||
|
|
||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; | ||
| id | body | ||
| -------+-------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| 10000 | 'great':1 'john':2 | ||
| 10001 | 'great':1 'jame':2 | ||
| 10002 | 'great':1 'jame':2 | ||
| 10003 | 'great':1 'jame':2 | ||
| (5 rows) | ||
|
|
||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; | ||
| id | body | ||
| -------+-------------------- | ||
| 10000 | 'great':1 'john':2 | ||
| 6 | 'ann':2 'great':1 | ||
| 10001 | 'great':1 'jame':2 | ||
| 10002 | 'great':1 'jame':2 | ||
| 10003 | 'great':1 'jame':2 | ||
| (5 rows) | ||
|
|
||
| VACUUM test_rum_vacuum; | ||
| -- this shouldn't cause a core dump | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); | ||
| id | body | ||
| ----+------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| (1 row) | ||
|
|
||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| id | body | ||
| -------+-------------------- | ||
| 10000 | 'great':1 'john':2 | ||
| (1 row) | ||
|
|
||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; | ||
| id | body | ||
| -------+-------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| 10000 | 'great':1 'john':2 | ||
| 10001 | 'great':1 'jame':2 | ||
| 10002 | 'great':1 'jame':2 | ||
| 10003 | 'great':1 'jame':2 | ||
| (5 rows) | ||
|
|
||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; | ||
| id | body | ||
| -------+-------------------- | ||
| 10000 | 'great':1 'john':2 | ||
| 6 | 'ann':2 'great':1 | ||
| 10001 | 'great':1 'jame':2 | ||
| 10002 | 'great':1 'jame':2 | ||
| 10003 | 'great':1 'jame':2 | ||
| (5 rows) | ||
|
|
||
| -- test that data can still be found after reinsertion | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i; | ||
| SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| count | ||
| ------- | ||
| 9998 | ||
| (1 row) | ||
|
|
||
| DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999; | ||
| VACUUM test_rum_vacuum; | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); | ||
| id | body | ||
| ----+------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| (1 row) | ||
|
|
||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| id | body | ||
| -------+-------------------- | ||
| 20000 | 'great':1 'john':2 | ||
| (1 row) | ||
|
|
||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; | ||
| id | body | ||
| -------+-------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| 10001 | 'great':1 'jame':2 | ||
| 10002 | 'great':1 'jame':2 | ||
| 10003 | 'great':1 'jame':2 | ||
| 20000 | 'great':1 'john':2 | ||
| (5 rows) | ||
|
|
||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; | ||
| id | body | ||
| -------+-------------------- | ||
| 20000 | 'great':1 'john':2 | ||
| 6 | 'ann':2 'great':1 | ||
| 10001 | 'great':1 'jame':2 | ||
| 10002 | 'great':1 'jame':2 | ||
| 10003 | 'great':1 'jame':2 | ||
| (5 rows) | ||
|
|
||
| -- test if do while loop works when an entry has no non-empty posting tree pages | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; | ||
| DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; | ||
| VACUUM test_rum_vacuum; | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); | ||
| id | body | ||
| ----+------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| (1 row) | ||
|
|
||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| id | body | ||
| ----+------ | ||
| (0 rows) | ||
|
|
||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; | ||
| id | body | ||
| -------+-------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| 10001 | 'great':1 'jame':2 | ||
| 10002 | 'great':1 'jame':2 | ||
| 10003 | 'great':1 'jame':2 | ||
| (4 rows) | ||
|
|
||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; | ||
| id | body | ||
| -------+-------------------- | ||
| 6 | 'ann':2 'great':1 | ||
| 10001 | 'great':1 'jame':2 | ||
| 10002 | 'great':1 'jame':2 | ||
| 10003 | 'great':1 'jame':2 | ||
| (4 rows) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to improve the test by adding additional comments and backward scan testing to it. I'm attaching the patch and waiting for your comments. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| -- Test RUM index scan correctness after concurrent VACUUM removes all | ||
| -- posting tree entry items. | ||
|
|
||
| SET enable_seqscan TO off; | ||
| SET enable_indexscan TO off; | ||
| SET enable_bitmapscan TO on; | ||
|
|
||
| CREATE TABLE test_rum_vacuum (id int, body tsvector); | ||
| ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false); | ||
|
|
||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i; | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i; | ||
|
|
||
| CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops); | ||
|
|
||
| DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5; | ||
| DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999; | ||
|
|
||
| -- test normal result | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; | ||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; | ||
|
|
||
| VACUUM test_rum_vacuum; | ||
|
|
||
| -- this shouldn't cause a core dump | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; | ||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; | ||
|
|
||
| -- test that data can still be found after reinsertion | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i; | ||
| SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999; | ||
|
|
||
| VACUUM test_rum_vacuum; | ||
|
|
||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; | ||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; | ||
|
|
||
| -- test if do while loop works when an entry has no non-empty posting tree pages | ||
| INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; | ||
| DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; | ||
| VACUUM test_rum_vacuum; | ||
|
|
||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); | ||
| SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); | ||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; | ||
| SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -684,9 +684,23 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) | |
| } | ||
|
|
||
| LockBuffer(entry->buffer, RUM_UNLOCK); | ||
| entry->isFinished = setListPositionScanEntry(rumstate, entry); | ||
| if (!entry->isFinished) | ||
| entry->curItem = entry->list[entry->offset]; | ||
|
|
||
| /* | ||
| * If the current page is empty (nlist == 0), we cannot assume the | ||
| * scan is complete, as subsequent pages may exist. Therefore, we | ||
| * set isFinished = false and leave entry->nlist = 0 and | ||
| * entry->offset = 0 to ensure that entryGetItem advances to the | ||
| * next page on the next call. Otherwise, initialize curItem to | ||
| * the first valid item. | ||
| */ | ||
| if (entry->nlist == 0) | ||
| entry->isFinished = false; | ||
| else | ||
| { | ||
| entry->isFinished = setListPositionScanEntry(rumstate, entry); | ||
| if (!entry->isFinished) | ||
| entry->curItem = entry->list[entry->offset]; | ||
| } | ||
| } | ||
| else if (RumGetNPosting(itup) > 0) | ||
| { | ||
|
|
@@ -699,6 +713,16 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) | |
| if (!entry->isFinished) | ||
| entry->curItem = entry->list[entry->offset]; | ||
| } | ||
| /* | ||
| * Else, the posting list for this entry has been entirely vacuumed | ||
| * away (nlist == 0 after setListPositionScanEntry). We cannot assume | ||
| * the scan is complete, as subsequent pages may exist. Therefore, we | ||
| * set isFinished = false and leave entry->nlist = 0 and entry->offset | ||
| * = 0 to ensure that entryGetItem advances to the next page on the | ||
| * next call. | ||
| */ | ||
| else | ||
| entry->isFinished = false; | ||
|
Comment on lines
+716
to
+725
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code branch handles either a regular search query or a full index scan query:
I think the comment should be slightly rewritten to reflect this: |
||
|
|
||
| if (entry->queryCategory == RUM_CAT_EMPTY_QUERY && | ||
| entry->scanWithAddInfo) | ||
|
|
@@ -1011,6 +1035,13 @@ entryGetNextItem(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) | |
|
|
||
| LockBuffer(entry->buffer, RUM_UNLOCK); | ||
|
|
||
| /* | ||
| * No valid item if VACUUM removed all items concurrently. Go on | ||
| * next page. | ||
| */ | ||
| if (entry->nlist == 0) | ||
| break; | ||
|
|
||
| if (entry->offset < 0) | ||
| { | ||
| if (ScanDirectionIsForward(entry->scanDirection) && | ||
|
|
@@ -1044,6 +1075,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) | |
| RumItemSetMin(&entry->curItem); | ||
| entry->offset = InvalidOffsetNumber; | ||
| entry->list = NULL; | ||
| entry->nlist = 0; | ||
| if (entry->gdi) | ||
| { | ||
| freeRumBtreeStack(entry->gdi->stack); | ||
|
|
@@ -1151,6 +1183,18 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) | |
|
|
||
| LockBuffer(entry->buffer, RUM_UNLOCK); | ||
| entry->isFinished = false; | ||
|
|
||
| /* | ||
| * Posting tree's first leaf page is empty due to concurrent VACUUM. | ||
| * Advance through empty pages until we find one with items or exhaust | ||
| * the tree. entryGetItem does not re-invoke entryGetNextItem after we | ||
| * return, so we must do it here to ensure curItem is valid on return. | ||
| */ | ||
| if (entry->nlist == 0) | ||
| { | ||
| entryGetNextItem(rumstate, entry, snapshot); | ||
| goto entry_done; | ||
| } | ||
| } | ||
| else if (RumGetNPosting(itup) > 0) | ||
| { | ||
|
|
@@ -1161,12 +1205,21 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) | |
| rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); | ||
| entry->isFinished = setListPositionScanEntry(rumstate, entry); | ||
| } | ||
| /* Posting list has been vacuumed. Go to the next entry. */ | ||
| else | ||
| { | ||
| ItemPointerSetInvalid(&entry->curItem.iptr); | ||
| entry->isFinished = true; | ||
| goto entry_done; | ||
| } | ||
|
|
||
| Assert(entry->nlist > 0 && entry->list); | ||
|
|
||
| entry->curItem = entry->list[entry->offset]; | ||
| entry->offset += entry->scanDirection; | ||
|
|
||
| entry_done: | ||
|
|
||
| SCAN_ENTRY_GET_KEY(entry, rumstate, itup); | ||
|
|
||
| /* | ||
|
|
@@ -1340,8 +1393,24 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps | |
| } | ||
| else if (entry->stack) | ||
| { | ||
| entry->offset++; | ||
| if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList) | ||
| /* | ||
| * We are responsible for ensuring that we keep advancing through | ||
| * ItemLists until we find one that contains at least one valid | ||
| * item. This is necessary because concurrent VACUUM may have | ||
| * removed all items from a page, leaving an empty ItemList. In | ||
| * such cases, we must continue to the next ItemList. | ||
| */ | ||
| bool success; | ||
|
|
||
| Assert(!entry->isFinished); | ||
|
|
||
| do | ||
| { | ||
| entry->isFinished = false; | ||
| success = entryGetNextItemList(rumstate, entry, snapshot); | ||
| } while (success && entry->nlist == 0); | ||
|
|
||
| if (success && nextEntryList) | ||
| *nextEntryList = true; | ||
| } | ||
| else | ||
|
|
@@ -1361,8 +1430,22 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps | |
| dropItem(entry)); | ||
| if (entry->stack && entry->isFinished) | ||
| { | ||
| entry->isFinished = false; | ||
| if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList) | ||
| /* | ||
| * We are responsible for ensuring that we keep advancing through | ||
| * ItemLists until we find one that contains at least one valid | ||
| * item. This is necessary because concurrent VACUUM may have | ||
| * removed all items from a page, leaving an empty ItemList. In | ||
| * such cases, we must continue to the next ItemList. | ||
| */ | ||
| bool success; | ||
|
|
||
| do | ||
| { | ||
| entry->isFinished = false; | ||
| success = entryGetNextItemList(rumstate, entry, snapshot); | ||
| } while (success && entry->nlist == 0); | ||
|
|
||
| if (success && nextEntryList) | ||
| *nextEntryList = true; | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is necessary to add an empty line after this line, otherwise the test fails.