diff --git a/src/core/tasks/url/operators/validate/queries/ctes/consensus/impl/name.py b/src/core/tasks/url/operators/validate/queries/ctes/consensus/impl/name.py new file mode 100644 index 00000000..b51f77b5 --- /dev/null +++ b/src/core/tasks/url/operators/validate/queries/ctes/consensus/impl/name.py @@ -0,0 +1,23 @@ +from sqlalchemy import Column + +from src.core.tasks.url.operators.validate.queries.ctes.consensus.base import ValidationCTEContainer +from src.core.tasks.url.operators.validate.queries.ctes.consensus.helper import build_validation_query +from src.core.tasks.url.operators.validate.queries.ctes.counts.impl.name import NAME_VALIDATION_COUNTS_CTE +from src.core.tasks.url.operators.validate.queries.ctes.scored import ScoredCTEContainer + + +class NameValidationCTEContainer(ValidationCTEContainer): + + def __init__(self): + _scored = ScoredCTEContainer( + NAME_VALIDATION_COUNTS_CTE + ) + + self._query = build_validation_query( + _scored, + "name" + ) + + @property + def name(self) -> Column[int]: + return self._query.c.name \ No newline at end of file diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py new file mode 100644 index 00000000..5cb014f1 --- /dev/null +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py @@ -0,0 +1,28 @@ +from sqlalchemy import select, func + +from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer +from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.views.unvalidated_url import UnvalidatedURL + +NAME_VALIDATION_COUNTS_CTE = ValidatedCountsCTEContainer( + ( + select( + URLNameSuggestion.url_id, + URLNameSuggestion.suggestion.label("entity"), + func.count().label("votes") + ) + .join( + UnvalidatedURL, + URLNameSuggestion.url_id == UnvalidatedURL.url_id + ) + .join( + LinkUserNameSuggestion, + LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id + ) + .group_by( + URLNameSuggestion.url_id, + URLNameSuggestion.suggestion + ) + ).cte("counts_name") +) \ No newline at end of file diff --git a/src/core/tasks/url/operators/validate/queries/get/core.py b/src/core/tasks/url/operators/validate/queries/get/core.py index f361912e..31d21f07 100644 --- a/src/core/tasks/url/operators/validate/queries/get/core.py +++ b/src/core/tasks/url/operators/validate/queries/get/core.py @@ -6,6 +6,7 @@ from src.core.exceptions import FailedValidationException from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.agency import AgencyValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.location import LocationValidationCTEContainer +from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.name import NameValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.record_type import \ RecordTypeValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.url_type import URLTypeValidationCTEContainer @@ -24,6 +25,7 @@ async def run(self, session: AsyncSession) -> list[GetURLsForAutoValidationRespo location = LocationValidationCTEContainer() url_type = URLTypeValidationCTEContainer() record_type = RecordTypeValidationCTEContainer() + name = NameValidationCTEContainer() query = ( select( @@ -32,6 +34,7 @@ async def run(self, session: AsyncSession) -> list[GetURLsForAutoValidationRespo agency.agency_id, url_type.url_type, record_type.record_type, + name.name, ) .outerjoin( agency.query, @@ -49,6 +52,10 @@ async def run(self, session: AsyncSession) -> list[GetURLsForAutoValidationRespo record_type.query, URL.id == record_type.url_id, ) + .outerjoin( + name.query, + URL.id == name.url_id, + ) ) query = add_where_condition( query, @@ -56,6 +63,7 @@ async def run(self, session: AsyncSession) -> list[GetURLsForAutoValidationRespo location=location, url_type=url_type, record_type=record_type, + name=name, ) mappings: Sequence[RowMapping] = await sh.mappings(session, query=query) diff --git a/src/core/tasks/url/operators/validate/queries/get/models/response.py b/src/core/tasks/url/operators/validate/queries/get/models/response.py index b91dc64c..0b72610d 100644 --- a/src/core/tasks/url/operators/validate/queries/get/models/response.py +++ b/src/core/tasks/url/operators/validate/queries/get/models/response.py @@ -11,6 +11,7 @@ class GetURLsForAutoValidationResponse(BaseModel): agency_id: int | None url_type: URLType record_type: RecordType | None + name: str | None @model_validator(mode="after") def forbid_record_type_if_not_data_source(self): diff --git a/src/core/tasks/url/operators/validate/queries/helper.py b/src/core/tasks/url/operators/validate/queries/helper.py index 25128fbe..e2632ca6 100644 --- a/src/core/tasks/url/operators/validate/queries/helper.py +++ b/src/core/tasks/url/operators/validate/queries/helper.py @@ -2,6 +2,7 @@ from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.agency import AgencyValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.location import LocationValidationCTEContainer +from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.name import NameValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.record_type import \ RecordTypeValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.url_type import URLTypeValidationCTEContainer @@ -13,7 +14,8 @@ def add_where_condition( agency: AgencyValidationCTEContainer, location: LocationValidationCTEContainer, url_type: URLTypeValidationCTEContainer, - record_type: RecordTypeValidationCTEContainer + record_type: RecordTypeValidationCTEContainer, + name: NameValidationCTEContainer, ) -> Select: return ( query @@ -25,6 +27,7 @@ def add_where_condition( agency.agency_id.isnot(None), location.location_id.isnot(None), record_type.record_type.isnot(None), + name.name.isnot(None), ), and_( url_type.url_type.in_( @@ -32,6 +35,7 @@ def add_where_condition( ), agency.agency_id.isnot(None), location.location_id.isnot(None), + name.name.isnot(None), ), url_type.url_type == URLType.NOT_RELEVANT.value ), diff --git a/src/core/tasks/url/operators/validate/queries/insert.py b/src/core/tasks/url/operators/validate/queries/insert.py index 006f23cd..31bdfa74 100644 --- a/src/core/tasks/url/operators/validate/queries/insert.py +++ b/src/core/tasks/url/operators/validate/queries/insert.py @@ -1,11 +1,14 @@ from typing import Any +from sqlalchemy import update, case from sqlalchemy.ext.asyncio import AsyncSession from src.core.tasks.url.operators.validate.queries.get.models.response import GetURLsForAutoValidationResponse from src.db.models.impl.flag.auto_validated.pydantic import FlagURLAutoValidatedPydantic from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic +from src.db.models.impl.url.core.pydantic.upsert import URLUpsertModel +from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.record_type.pydantic import URLRecordTypePydantic from src.db.queries.base.builder import QueryBuilderBase from src.db.helpers.session import session_helper as sh @@ -56,4 +59,27 @@ async def run(self, session: AsyncSession) -> Any: ]: await sh.bulk_insert(session, models=inserts) + await self.update_urls(session) + + async def update_urls(self, session: AsyncSession) -> Any: + id_to_name: dict[int, str] = {} + for response in self._responses: + if response.name is not None: + id_to_name[response.url_id] = response.name + + if len(id_to_name) == 0: + return + + stmt = ( + update(URL) + .where(URL.id.in_(id_to_name.keys())) + .values( + name=case( + {id_: val for id_, val in id_to_name.items()}, + value=URL.id + ) + ) + ) + + await session.execute(stmt) diff --git a/src/core/tasks/url/operators/validate/queries/prereq/core.py b/src/core/tasks/url/operators/validate/queries/prereq/core.py index 7c9a9684..6ee25e53 100644 --- a/src/core/tasks/url/operators/validate/queries/prereq/core.py +++ b/src/core/tasks/url/operators/validate/queries/prereq/core.py @@ -3,6 +3,7 @@ from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.agency import AgencyValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.location import LocationValidationCTEContainer +from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.name import NameValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.record_type import \ RecordTypeValidationCTEContainer from src.core.tasks.url.operators.validate.queries.ctes.consensus.impl.url_type import URLTypeValidationCTEContainer @@ -25,6 +26,7 @@ async def run(self, session: AsyncSession) -> bool: location = LocationValidationCTEContainer() url_type = URLTypeValidationCTEContainer() record_type = RecordTypeValidationCTEContainer() + name = NameValidationCTEContainer() query = ( @@ -50,6 +52,10 @@ async def run(self, session: AsyncSession) -> bool: record_type.query, UnvalidatedURL.url_id == record_type.url_id, ) + .outerjoin( + name.query, + UnvalidatedURL.url_id == name.url_id, + ) ) query = add_where_condition( query, @@ -57,6 +63,7 @@ async def run(self, session: AsyncSession) -> bool: location=location, url_type=url_type, record_type=record_type, + name=name, ).limit(1) return await sh.results_exist(session, query=query) diff --git a/src/db/models/impl/url/core/pydantic/upsert.py b/src/db/models/impl/url/core/pydantic/upsert.py new file mode 100644 index 00000000..8a101c70 --- /dev/null +++ b/src/db/models/impl/url/core/pydantic/upsert.py @@ -0,0 +1,18 @@ +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.templates_.base import Base +from src.db.templates.markers.bulk.upsert import BulkUpsertableModel + + +class URLUpsertModel(BulkUpsertableModel): + + @classmethod + def id_field(cls) -> str: + return "id" + + @classmethod + def sa_model(cls) -> type[Base]: + """Defines the SQLAlchemy model.""" + return URL + + id: int + name: str | None diff --git a/tests/automated/integration/tasks/url/impl/validate/helper.py b/tests/automated/integration/tasks/url/impl/validate/helper.py index 85b13695..6ab44984 100644 --- a/tests/automated/integration/tasks/url/impl/validate/helper.py +++ b/tests/automated/integration/tasks/url/impl/validate/helper.py @@ -5,7 +5,9 @@ from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource from tests.conftest import db_data_creator from tests.helpers.counter import next_int from tests.helpers.data_creator.core import DBDataCreator @@ -117,4 +119,27 @@ async def add_record_type_suggestions( url_id=self.url_id, record_type=record_type, user_id=next_int() - ) \ No newline at end of file + ) + + async def add_name_suggestion( + self, + count: int = 1, + ) -> str: + name = f"Test Validate Task Name" + suggestion_id: int = await self.db_data_creator.name_suggestion( + url_id=self.url_id, + source=NameSuggestionSource.USER, + name=name, + ) + for i in range(count): + await self.db_data_creator.user_name_endorsement( + suggestion_id=suggestion_id, + user_id=next_int(), + ) + return name + + async def check_name(self) -> None: + urls: list[URL] = await self.adb_client.get_all(URL) + assert len(urls) == 1 + url: URL = urls[0] + assert url.name == "Test Validate Task Name" \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py index 500d147c..82bed288 100644 --- a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py +++ b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py @@ -37,6 +37,10 @@ async def test_data_source( await helper.add_record_type_suggestions(count=2) + assert not await operator.meets_task_prerequisites() + + await helper.add_name_suggestion(count=2) + assert await operator.meets_task_prerequisites() # Add different record type suggestion @@ -59,4 +63,5 @@ async def test_data_source( await helper.check_auto_validated() await helper.check_agency_linked() await helper.check_record_type() + await helper.check_name() diff --git a/tests/automated/integration/tasks/url/impl/validate/test_individual_record.py b/tests/automated/integration/tasks/url/impl/validate/test_individual_record.py index 664b52d4..19d025df 100644 --- a/tests/automated/integration/tasks/url/impl/validate/test_individual_record.py +++ b/tests/automated/integration/tasks/url/impl/validate/test_individual_record.py @@ -28,6 +28,10 @@ async def test_individual_record( await helper.add_location_suggestions(count=2) + assert not await operator.meets_task_prerequisites() + + await helper.add_name_suggestion(count=2) + assert await operator.meets_task_prerequisites() # Add additional agency suggestions to create tie @@ -50,4 +54,5 @@ async def test_individual_record( await helper.check_url_validated(URLType.INDIVIDUAL_RECORD) await helper.check_auto_validated() await helper.check_agency_linked() + await helper.check_name() diff --git a/tests/automated/integration/tasks/url/impl/validate/test_meta_url.py b/tests/automated/integration/tasks/url/impl/validate/test_meta_url.py index be88157f..962a2b63 100644 --- a/tests/automated/integration/tasks/url/impl/validate/test_meta_url.py +++ b/tests/automated/integration/tasks/url/impl/validate/test_meta_url.py @@ -35,6 +35,10 @@ async def test_meta_url( # Add two location suggestions await helper.add_location_suggestions(count=2) + assert not await operator.meets_task_prerequisites() + + await helper.add_name_suggestion(count=2) + # Assert operator now meets task prerequisites assert await operator.meets_task_prerequisites() @@ -58,3 +62,4 @@ async def test_meta_url( await helper.check_url_validated(URLType.META_URL) await helper.check_auto_validated() await helper.check_agency_linked() + await helper.check_name() diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index 5fb700b7..b8cc936b 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -20,6 +20,7 @@ from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL +from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML @@ -684,10 +685,24 @@ async def name_suggestion( self, url_id: int, source: NameSuggestionSource = NameSuggestionSource.HTML_METADATA_TITLE, + name: str | None = None, ) -> int: + if name is None: + name = f"Test Name {next_int()}" suggestion = URLNameSuggestion( url_id=url_id, source=source, - suggestion=f"Test Name {next_int()}", + suggestion=name, ) return await self.adb_client.add(suggestion, return_id=True) + + async def user_name_endorsement( + self, + suggestion_id: int, + user_id: int, + ): + link = LinkUserNameSuggestion( + suggestion_id=suggestion_id, + user_id=user_id, + ) + await self.adb_client.add(link)