Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions pulpcore/app/models/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,15 +905,7 @@ def get_content(self, content_qs=None):
if content_qs is None:
content_qs = Content.objects

content_ids = self._get_content_ids()
if isinstance(content_ids, list) and len(content_ids) >= 65535:
# Workaround for PostgreSQL's limit on the number of parameters in a query
content_ids = (
RepositoryVersion.objects.filter(pk=self.pk)
.annotate(cids=Func(F("content_ids"), function="unnest"))
.values_list("cids", flat=True)
)
return content_qs.filter(pk__in=content_ids)
return content_qs.filter(pk__in=self._get_content_ids())

@property
def content(self):
Expand Down
73 changes: 73 additions & 0 deletions pulpcore/tests/unit/models/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,3 +788,76 @@ def test_batch_operations_preserve_correctness(repository, db):
assert rvcd_qs.get(count_type=RepositoryVersionContentDetails.PRESENT).count == 40
assert rvcd_qs.filter(count_type=RepositoryVersionContentDetails.ADDED).first() is None
assert rvcd_qs.get(count_type=RepositoryVersionContentDetails.REMOVED).count == 60


def test_postgresql_parameter_limit(db, repository):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I split this test off because in its current state it:

A) is flaky and seems to get stuck sometimes
B) doesn't seem to fail when the workaround is reverted, which means it's not a complete test

"""
Test repository operations with >65535 content units to verify PostgreSQL parameter limit
workaround.

PostgreSQL limits queries to 65535 parameters. When content_ids >= 65535, the
RepositoryVersion.get_content() method uses an unnest() workaround to avoid this limit.
This test verifies that added(), removed(), and content all handle >65535 items correctly.

This test verifies the fix from commit b6aaa23 which added the PostgreSQL unnest() workaround.
"""
# Create 66000 content units (exceeds PostgreSQL's 65535 parameter limit)
large_content_set = [Content(pulp_type="core.content") for _ in range(66000)]
Content.objects.bulk_create(large_content_set, batch_size=1000)
large_pks = sorted([c.pk for c in large_content_set])

version0 = repository.latest_version()

# Test 1: Add >65535 content units - tests added() and content with >65535 items
with repository.new_version() as version1:
version1.add_content(Content.objects.filter(pk__in=large_pks))

# Verify content_ids triggers the unnest() workaround threshold
assert isinstance(version1.content_ids, list)
assert len(version1.content_ids) >= 65535

# Test the content property with >65535 items (triggers unnest() workaround)
assert version1.content.count() == 66000

# Test the added() method with >65535 items
current_pks = set(version1.content.values_list("pk", flat=True))
added_pks = set(version1.added(base_version=version0).values_list("pk", flat=True))
removed_pks = set(version1.removed(base_version=version0).values_list("pk", flat=True))

assert len(current_pks) == 66000
assert len(added_pks) == 66000 # Critical: added() must handle >65535 items
assert len(removed_pks) == 0

# Verify RepositoryVersionContentDetails
rvcd_qs = RepositoryVersionContentDetails.objects.filter(
repository_version=version1, content_type="core.content"
)
assert rvcd_qs.get(count_type=RepositoryVersionContentDetails.PRESENT).count == 66000
assert rvcd_qs.get(count_type=RepositoryVersionContentDetails.ADDED).count == 66000
assert rvcd_qs.filter(count_type=RepositoryVersionContentDetails.REMOVED).first() is None

# Test 2: Remove >65535 content units - tests removed() with >65535 items
with repository.new_version() as version2:
version2.remove_content(Content.objects.filter(pk__in=large_pks))

# Test the removed() method with >65535 items
current_pks = set(version2.content.values_list("pk", flat=True))
added_pks = set(version2.added(base_version=version1).values_list("pk", flat=True))
removed_pks = set(version2.removed(base_version=version1).values_list("pk", flat=True))

assert len(current_pks) == 0
assert len(added_pks) == 0
assert len(removed_pks) == 66000 # Critical: removed() must handle >65535 items

# Verify RepositoryVersionContentDetails
rvcd_qs = RepositoryVersionContentDetails.objects.filter(
repository_version=version2, content_type="core.content"
)
assert rvcd_qs.filter(count_type=RepositoryVersionContentDetails.PRESENT).first() is None
assert rvcd_qs.filter(count_type=RepositoryVersionContentDetails.ADDED).first() is None
assert rvcd_qs.get(count_type=RepositoryVersionContentDetails.REMOVED).count == 66000

# Verify we can iterate and fetch content without errors
first_100 = list(version1.content[:100].values_list("pk", flat=True))
assert len(first_100) == 100
assert all(pk in large_pks for pk in first_100)