From 6f2ab38ba72749ebf613ef91b2d4f1523116a7f8 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Sat, 13 Sep 2025 09:56:32 -0400 Subject: [PATCH] Fix bug in URL Submit Approved Task, update test --- src/api/main.py | 1 + .../operators/submit_approved/queries/cte.py | 29 +++++++++++++++++++ .../operators/submit_approved/queries/get.py | 11 ++++--- .../submit_approved/queries/has_validated.py | 20 ++++--------- src/db/models/impl/batch/sqlalchemy.py | 3 +- .../test_submit_approved_url_task.py | 3 ++ tests/conftest.py | 8 +++-- 7 files changed, 52 insertions(+), 23 deletions(-) create mode 100644 src/core/tasks/url/operators/submit_approved/queries/cte.py diff --git a/src/api/main.py b/src/api/main.py index f4f7db5c..95041e19 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -41,6 +41,7 @@ from src.external.url_request.core import URLRequestInterface from environs import Env + @asynccontextmanager async def lifespan(app: FastAPI): env_var_manager = EnvVarManager.get() diff --git a/src/core/tasks/url/operators/submit_approved/queries/cte.py b/src/core/tasks/url/operators/submit_approved/queries/cte.py new file mode 100644 index 00000000..ccd55c8d --- /dev/null +++ b/src/core/tasks/url/operators/submit_approved/queries/cte.py @@ -0,0 +1,29 @@ +from sqlalchemy import CTE, select, exists +from sqlalchemy.orm import aliased + +from src.collectors.enums import URLStatus +from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource + +VALIDATED_URLS_WITHOUT_DS_SQ =( + select(URL) + .join( + FlagURLValidated, + FlagURLValidated.url_id == URL.id + ) + .where( + URL.status == URLStatus.OK, + FlagURLValidated.type == URLValidatedType.DATA_SOURCE, + ~exists().where( + URLDataSource.url_id == URL.id + ) + ) + .subquery() +) + +VALIDATED_URLS_WITHOUT_DS_ALIAS = aliased( + URL, + VALIDATED_URLS_WITHOUT_DS_SQ +) \ No newline at end of file diff --git a/src/core/tasks/url/operators/submit_approved/queries/get.py b/src/core/tasks/url/operators/submit_approved/queries/get.py index 19b32b5d..16b38a82 100644 --- a/src/core/tasks/url/operators/submit_approved/queries/get.py +++ b/src/core/tasks/url/operators/submit_approved/queries/get.py @@ -3,6 +3,7 @@ from sqlalchemy.orm import selectinload from src.collectors.enums import URLStatus +from src.core.tasks.url.operators.submit_approved.queries.cte import VALIDATED_URLS_WITHOUT_DS_ALIAS from src.core.tasks.url.operators.submit_approved.tdo import SubmitApprovedURLTDO from src.db.models.impl.flag.url_validated.enums import URLValidatedType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated @@ -30,13 +31,11 @@ async def _process_results(self, urls): @staticmethod async def _build_query(): query = ( - select(URL) - .join(FlagURLValidated, FlagURLValidated.url_id == URL.id) - .where(FlagURLValidated.type == URLValidatedType.DATA_SOURCE) + select(VALIDATED_URLS_WITHOUT_DS_ALIAS) .options( - selectinload(URL.optional_data_source_metadata), - selectinload(URL.confirmed_agencies), - selectinload(URL.reviewing_user) + selectinload(VALIDATED_URLS_WITHOUT_DS_ALIAS.optional_data_source_metadata), + selectinload(VALIDATED_URLS_WITHOUT_DS_ALIAS.confirmed_agencies), + selectinload(VALIDATED_URLS_WITHOUT_DS_ALIAS.reviewing_user) ).limit(100) ) return query diff --git a/src/core/tasks/url/operators/submit_approved/queries/has_validated.py b/src/core/tasks/url/operators/submit_approved/queries/has_validated.py index 5a3ff464..2cbee486 100644 --- a/src/core/tasks/url/operators/submit_approved/queries/has_validated.py +++ b/src/core/tasks/url/operators/submit_approved/queries/has_validated.py @@ -1,9 +1,8 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated +from src.core.tasks.url.operators.submit_approved.queries.cte import VALIDATED_URLS_WITHOUT_DS_ALIAS +from src.db.helpers.session import session_helper as sh from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase @@ -12,15 +11,8 @@ class HasValidatedURLsQueryBuilder(QueryBuilderBase): async def run(self, session: AsyncSession) -> bool: query = ( - select(URL) - .join( - FlagURLValidated, - FlagURLValidated.url_id == URL.id - ) - .where( - FlagURLValidated.type == URLValidatedType.DATA_SOURCE - ) + select(VALIDATED_URLS_WITHOUT_DS_ALIAS) + .limit(1) ) - urls = await session.execute(query) - urls = urls.scalars().all() - return len(urls) > 0 \ No newline at end of file + url: URL | None = await sh.one_or_none(session, query=query) + return url is not None \ No newline at end of file diff --git a/src/db/models/impl/batch/sqlalchemy.py b/src/db/models/impl/batch/sqlalchemy.py index 0e6aa611..ab345ecc 100644 --- a/src/db/models/impl/batch/sqlalchemy.py +++ b/src/db/models/impl/batch/sqlalchemy.py @@ -52,6 +52,7 @@ class Batch(WithIDBase): back_populates="batch", overlaps="url" ) - # missings = relationship("Missing", back_populates="batch") # Not in active use + # These relationships exist but are never referenced by their attributes + # missings = relationship("Missing", back_populates="batch") logs = relationship("Log", back_populates="batch") duplicates = relationship("Duplicate", back_populates="batch") diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py b/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py index f992fbb6..acb0005e 100644 --- a/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py +++ b/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py @@ -49,6 +49,9 @@ async def test_submit_approved_url_task( # Check Task has been marked as completed assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message + # Check Task Operator no longer meets pre-requisites + assert not await operator.meets_task_prerequisites() + # Get URLs urls: list[URL] = await db_data_creator.adb_client.get_all(URL, order_by_attribute="id") url_1: URL = urls[0] diff --git a/tests/conftest.py b/tests/conftest.py index 35cbeb29..35a87275 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,17 +2,21 @@ import os from contextlib import contextmanager from typing import Any, Generator, AsyncGenerator -from unittest.mock import AsyncMock import pytest import pytest_asyncio from aiohttp import ClientSession from alembic.config import Config -from pdap_access_manager import AccessManager from sqlalchemy import create_engine, inspect, MetaData from sqlalchemy.orm import scoped_session, sessionmaker from src.core.env_var_manager import EnvVarManager +# Below are to prevent import errors +from src.db.models.impl.missing import Missing # noqa: F401 +from src.db.models.impl.log.sqlalchemy import Log # noqa: F401 +from src.db.models.impl.task.error import TaskError # noqa: F401 +from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate # noqa: F401 +from src.db.models.impl.url.probed_for_404 import URLProbedFor404 # noqa: F401 from src.db.client.async_ import AsyncDatabaseClient from src.db.client.sync import DatabaseClient from src.db.helpers.connect import get_postgres_connection_string