diff --git a/collector_db/AsyncDatabaseClient.py b/collector_db/AsyncDatabaseClient.py index 957a4eb6..9e1ab473 100644 --- a/collector_db/AsyncDatabaseClient.py +++ b/collector_db/AsyncDatabaseClient.py @@ -1573,20 +1573,26 @@ async def get_recent_batch_status_info( limit = 100 query = Select(Batch) if has_pending_urls is not None: + pending_url_subquery = Select(URL).where( + and_( + URL.batch_id == Batch.id, + URL.outcome == URLStatus.PENDING.value + ) + ) + if has_pending_urls: # Query for all that have pending URLs - query = query.join(URL, Batch.id == URL.batch_id).filter(URL.outcome == URLStatus.PENDING.value) + query = query.where(exists( + pending_url_subquery + )) else: # Query for all that DO NOT have pending URLs # (or that have no URLs at all) - query = query.join( - URL, - Batch.id == URL.batch_id, - isouter=True - ).filter( - or_( - URL.outcome != URLStatus.PENDING.value, - URL.outcome.is_(None) + query = query.where( + not_( + exists( + pending_url_subquery + ) ) ) if collector_type: diff --git a/tests/helpers/DBDataCreator.py b/tests/helpers/DBDataCreator.py index 28d8a573..695a3c7a 100644 --- a/tests/helpers/DBDataCreator.py +++ b/tests/helpers/DBDataCreator.py @@ -2,7 +2,7 @@ from random import randint from typing import List, Optional -from pydantic import BaseModel +from pydantic import BaseModel, model_validator from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.BatchInfo import BatchInfo diff --git a/tests/test_automated/integration/api/test_batch.py b/tests/test_automated/integration/api/test_batch.py index bc86dfec..961b1a30 100644 --- a/tests/test_automated/integration/api/test_batch.py +++ b/tests/test_automated/integration/api/test_batch.py @@ -16,14 +16,14 @@ async def test_get_batch_status_pending_url_filter(api_test_helper): # Add an errored out batch batch_error = await ath.db_data_creator.batch_and_urls( strategy=CollectorType.EXAMPLE, - url_count=1, + url_count=2, batch_status=BatchStatus.ERROR ) # Add a batch with pending urls batch_pending = await ath.db_data_creator.batch_and_urls( strategy=CollectorType.EXAMPLE, - url_count=1, + url_count=2, batch_status=BatchStatus.READY_TO_LABEL, with_html_content=True, url_status=URLStatus.PENDING @@ -32,7 +32,7 @@ async def test_get_batch_status_pending_url_filter(api_test_helper): # Add a batch with submitted URLs batch_submitted = await ath.db_data_creator.batch_and_urls( strategy=CollectorType.EXAMPLE, - url_count=1, + url_count=2, batch_status=BatchStatus.READY_TO_LABEL, with_html_content=True, url_status=URLStatus.SUBMITTED @@ -41,14 +41,14 @@ async def test_get_batch_status_pending_url_filter(api_test_helper): # Add an aborted batch batch_aborted = await ath.db_data_creator.batch_and_urls( strategy=CollectorType.EXAMPLE, - url_count=1, + url_count=2, batch_status=BatchStatus.ABORTED ) # Add a batch with validated URLs batch_validated = await ath.db_data_creator.batch_and_urls( strategy=CollectorType.EXAMPLE, - url_count=1, + url_count=2, batch_status=BatchStatus.READY_TO_LABEL, with_html_content=True, url_status=URLStatus.VALIDATED