diff --git a/collector_db/StatementComposer.py b/collector_db/StatementComposer.py index b2b7e706..a84df5a1 100644 --- a/collector_db/StatementComposer.py +++ b/collector_db/StatementComposer.py @@ -3,10 +3,11 @@ from sqlalchemy import Select, select, exists, Table, func, Subquery, and_ from sqlalchemy.orm import aliased -from collector_db.enums import URLMetadataAttributeType, ValidationStatus +from collector_db.enums import URLMetadataAttributeType, ValidationStatus, TaskType from collector_db.models import URL, URLHTMLContent, AutomatedUrlAgencySuggestion, URLOptionalDataSourceMetadata, Batch, \ - ConfirmedURLAgency + ConfirmedURLAgency, LinkTaskURL, Task from collector_manager.enums import URLStatus, CollectorType +from core.enums import BatchStatus class StatementComposer: @@ -16,11 +17,19 @@ class StatementComposer: @staticmethod def pending_urls_without_html_data() -> Select: - return (select(URL). - outerjoin(URLHTMLContent). - where(URLHTMLContent.id == None). - where(URL.outcome == URLStatus.PENDING.value)) + subquery = (select(1). + select_from(LinkTaskURL). + join(Task, LinkTaskURL.task_id == Task.id). + where(LinkTaskURL.url_id == URL.id). + where(Task.task_type == TaskType.HTML.value). + where(Task.task_status == BatchStatus.COMPLETE.value) + ) + query = select(URL).where( + ~exists(subquery) + ) + + return query @staticmethod diff --git a/security_manager/SecurityManager.py b/security_manager/SecurityManager.py index 8d80f46c..18bc6a26 100644 --- a/security_manager/SecurityManager.py +++ b/security_manager/SecurityManager.py @@ -39,7 +39,6 @@ def __init__( def validate_token(self, token: str) -> AccessInfo: try: payload = jwt.decode(token, self.secret_key, algorithms=[ALGORITHM]) - print(payload) return self.payload_to_access_info(payload) except InvalidTokenError as e: raise HTTPException( diff --git a/tests/test_automated/integration/api/test_example_collector.py b/tests/test_automated/integration/api/test_example_collector.py index a235d8e8..d1466c8c 100644 --- a/tests/test_automated/integration/api/test_example_collector.py +++ b/tests/test_automated/integration/api/test_example_collector.py @@ -24,7 +24,7 @@ async def test_example_collector(api_test_helper): # Temporarily disable task trigger disable_task_trigger(ath) - logger = AsyncCoreLogger(adb_client=AsyncDatabaseClient()) + logger = AsyncCoreLogger(adb_client=AsyncDatabaseClient(), flush_interval=1) await logger.__aenter__() ath.async_core.collector_manager.logger = logger @@ -93,7 +93,7 @@ async def test_example_collector_error(api_test_helper, monkeypatch): """ ath = api_test_helper - logger = AsyncCoreLogger(adb_client=AsyncDatabaseClient()) + logger = AsyncCoreLogger(adb_client=AsyncDatabaseClient(), flush_interval=1) await logger.__aenter__() ath.async_core.collector_manager.logger = logger