diff --git a/src/api/endpoints/annotate/all/get/models/name.py b/src/api/endpoints/annotate/all/get/models/name.py new file mode 100644 index 00000000..80857305 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/models/name.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class NameAnnotationSuggestion(BaseModel): + name: str + suggestion_id: int + endorsement_count: int \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/models/response.py b/src/api/endpoints/annotate/all/get/models/response.py index 0c584495..ac444e5a 100644 --- a/src/api/endpoints/annotate/all/get/models/response.py +++ b/src/api/endpoints/annotate/all/get/models/response.py @@ -4,6 +4,7 @@ from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo +from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion from src.api.endpoints.annotate.dtos.shared.base.response import AnnotationInnerResponseInfoBase from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo from src.core.enums import RecordType @@ -22,6 +23,9 @@ class GetNextURLForAllAnnotationInnerResponse(AnnotationInnerResponseInfoBase): suggested_record_type: RecordType | None = Field( title="What record type, if any, the auto-labeler identified the URL as" ) + name_suggestions: list[NameAnnotationSuggestion] | None = Field( + title="User and Auto-Suggestions for names" + ) class GetNextURLForAllAnnotationResponse(BaseModel): diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index fdc7beee..da859135 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -6,9 +6,11 @@ from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo from src.api.endpoints.annotate.agency.get.queries.agency_suggestion_.core import GetAgencySuggestionsQueryBuilder from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo +from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse, \ GetNextURLForAllAnnotationInnerResponse from src.api.endpoints.annotate.all.get.queries.location_.core import GetLocationSuggestionsQueryBuilder +from src.api.endpoints.annotate.all.get.queries.name.core import GetNameSuggestionsQueryBuilder from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo from src.collectors.enums import URLStatus from src.db.dto_converter import DTOConverter @@ -104,6 +106,7 @@ async def run( joinedload(URL.html_content), joinedload(URL.auto_relevant_suggestion), joinedload(URL.auto_record_type_suggestion), + joinedload(URL.name_suggestions), ) query = query.order_by( @@ -133,6 +136,8 @@ async def run( await GetAgencySuggestionsQueryBuilder(url_id=url.id).run(session) location_suggestions: LocationAnnotationResponseOuterInfo = \ await GetLocationSuggestionsQueryBuilder(url_id=url.id).run(session) + name_suggestions: list[NameAnnotationSuggestion] = \ + await GetNameSuggestionsQueryBuilder(url_id=url.id).run(session) return GetNextURLForAllAnnotationResponse( next_annotation=GetNextURLForAllAnnotationInnerResponse( @@ -155,5 +160,6 @@ async def run( ] ).run(session), location_suggestions=location_suggestions, + name_suggestions=name_suggestions ) ) \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/name/__init__.py b/src/api/endpoints/annotate/all/get/queries/name/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/annotate/all/get/queries/name/core.py b/src/api/endpoints/annotate/all/get/queries/name/core.py new file mode 100644 index 00000000..b048cb2c --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/name/core.py @@ -0,0 +1,58 @@ +from typing import Sequence + +from sqlalchemy import select, func, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion +from src.db.helpers.session import session_helper as sh +from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.queries.base.builder import QueryBuilderBase + + +class GetNameSuggestionsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + url_id: int + ): + super().__init__() + self.url_id = url_id + + async def run(self, session: AsyncSession) -> list[NameAnnotationSuggestion]: + query = ( + select( + URLNameSuggestion.id.label('suggestion_id'), + URLNameSuggestion.suggestion.label('name'), + func.count( + LinkUserNameSuggestion.user_id + ).label('endorsement_count'), + ) + .outerjoin( + LinkUserNameSuggestion, + LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id, + ) + .where( + URLNameSuggestion.url_id == self.url_id, + ) + .group_by( + URLNameSuggestion.id, + URLNameSuggestion.suggestion, + ) + .order_by( + func.count(LinkUserNameSuggestion.user_id).desc(), + URLNameSuggestion.id.asc(), + ) + .limit(3) + ) + + mappings: Sequence[RowMapping] = await sh.mappings(session, query=query) + return [ + NameAnnotationSuggestion( + **mapping + ) + for mapping in mappings + ] + + + diff --git a/src/api/endpoints/annotate/all/post/models/name.py b/src/api/endpoints/annotate/all/post/models/name.py new file mode 100644 index 00000000..9d71431e --- /dev/null +++ b/src/api/endpoints/annotate/all/post/models/name.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + + +class AnnotationPostNameInfo(BaseModel): + new_name: str | None = None + existing_name_id: int | None = None + + @property + def empty(self) -> bool: + return self.new_name is None and self.existing_name_id is None \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/post/models/request.py b/src/api/endpoints/annotate/all/post/models/request.py index 13207d4f..3480f346 100644 --- a/src/api/endpoints/annotate/all/post/models/request.py +++ b/src/api/endpoints/annotate/all/post/models/request.py @@ -1,5 +1,6 @@ from pydantic import BaseModel, model_validator +from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.core.enums import RecordType from src.core.exceptions import FailedValidationException from src.db.models.impl.flag.url_validated.enums import URLType @@ -10,6 +11,7 @@ class AllAnnotationPostInfo(BaseModel): record_type: RecordType | None = None agency_ids: list[int] location_ids: list[int] + name_info: AnnotationPostNameInfo = AnnotationPostNameInfo() @model_validator(mode="after") def forbid_record_type_if_meta_url_or_individual_record(self): diff --git a/src/api/endpoints/annotate/all/post/query.py b/src/api/endpoints/annotate/all/post/query.py index 85861fee..e6186790 100644 --- a/src/api/endpoints/annotate/all/post/query.py +++ b/src/api/endpoints/annotate/all/post/query.py @@ -1,6 +1,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo +from src.api.endpoints.annotate.all.post.requester import AddAllAnnotationsToURLRequester from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion @@ -24,40 +25,24 @@ def __init__( async def run(self, session: AsyncSession) -> None: - # Add relevant annotation - relevant_suggestion = UserURLTypeSuggestion( + requester = AddAllAnnotationsToURLRequester( + session=session, url_id=self.url_id, - user_id=self.user_id, - type=self.post_info.suggested_status + user_id=self.user_id ) - session.add(relevant_suggestion) + + # Add relevant annotation + requester.add_relevant_annotation(self.post_info.suggested_status) # If not relevant, do nothing else if self.post_info.suggested_status == URLType.NOT_RELEVANT: return - locations: list[UserLocationSuggestion] = [] - for location_id in self.post_info.location_ids: - locations.append(UserLocationSuggestion( - url_id=self.url_id, - user_id=self.user_id, - location_id=location_id - )) - session.add_all(locations) + requester.add_location_ids(self.post_info.location_ids) # TODO (TEST): Add test for submitting Meta URL validation - if self.post_info.record_type is not None: - record_type_suggestion = UserRecordTypeSuggestion( - url_id=self.url_id, - user_id=self.user_id, - record_type=self.post_info.record_type.value - ) - session.add(record_type_suggestion) - - for agency_id in self.post_info.agency_ids: - agency_suggestion = UserUrlAgencySuggestion( - url_id=self.url_id, - user_id=self.user_id, - agency_id=agency_id, - ) - session.add(agency_suggestion) + requester.optionally_add_record_type(self.post_info.record_type) + + requester.add_agency_ids(self.post_info.agency_ids) + + await requester.optionally_add_name_suggestion(self.post_info.name_info) diff --git a/src/api/endpoints/annotate/all/post/requester.py b/src/api/endpoints/annotate/all/post/requester.py new file mode 100644 index 00000000..44f0e0f7 --- /dev/null +++ b/src/api/endpoints/annotate/all/post/requester.py @@ -0,0 +1,95 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion +from src.db.templates.requester import RequesterBase + + +class AddAllAnnotationsToURLRequester(RequesterBase): + + def __init__( + self, + session: AsyncSession, + url_id: int, + user_id: int, + ): + super().__init__(session=session) + self.url_id = url_id + self.user_id = user_id + + def optionally_add_record_type( + self, + rt: RecordType | None, + ) -> None: + if rt is None: + return + record_type_suggestion = UserRecordTypeSuggestion( + url_id=self.url_id, + user_id=self.user_id, + record_type=rt.value + ) + self.session.add(record_type_suggestion) + + def add_relevant_annotation( + self, + url_type: URLType, + ) -> None: + relevant_suggestion = UserURLTypeSuggestion( + url_id=self.url_id, + user_id=self.user_id, + type=url_type + ) + self.session.add(relevant_suggestion) + + def add_agency_ids(self, agency_ids: list[int]) -> None: + for agency_id in agency_ids: + agency_suggestion = UserUrlAgencySuggestion( + url_id=self.url_id, + user_id=self.user_id, + agency_id=agency_id, + ) + self.session.add(agency_suggestion) + + def add_location_ids(self, location_ids: list[int]) -> None: + locations: list[UserLocationSuggestion] = [] + for location_id in location_ids: + locations.append(UserLocationSuggestion( + url_id=self.url_id, + user_id=self.user_id, + location_id=location_id + )) + self.session.add_all(locations) + + async def optionally_add_name_suggestion( + self, + name_info: AnnotationPostNameInfo + ) -> None: + if name_info.empty: + return + if name_info.existing_name_id is not None: + link = LinkUserNameSuggestion( + user_id=self.user_id, + suggestion_id=name_info.existing_name_id, + ) + self.session.add(link) + return + name_suggestion = URLNameSuggestion( + url_id=self.url_id, + suggestion=name_info.new_name, + source=NameSuggestionSource.USER + ) + self.session.add(name_suggestion) + await self.session.flush() + link = LinkUserNameSuggestion( + user_id=self.user_id, + suggestion_id=name_suggestion.id, + ) + self.session.add(link) diff --git a/src/core/core.py b/src/core/core.py index cd2b9be2..0af67665 100644 --- a/src/core/core.py +++ b/src/core/core.py @@ -3,9 +3,7 @@ from fastapi import HTTPException from pydantic import BaseModel -from sqlalchemy.exc import IntegrityError -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.api.endpoints.annotate.all.post.query import AddAllAnnotationsToURLQueryBuilder @@ -35,8 +33,7 @@ from src.api.endpoints.url.get.dto import GetURLsResponseInfo from src.collectors.enums import CollectorType from src.collectors.manager import AsyncCollectorManager -from src.core.enums import BatchStatus, RecordType, AnnotationType -from src.core.error_manager.core import ErrorManager +from src.core.enums import BatchStatus from src.core.tasks.url.manager import TaskManager from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index fec9de54..1e6d76a6 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -1,6 +1,7 @@ from sqlalchemy import Column, Text, String, JSON from sqlalchemy.orm import relationship +from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.db.models.helpers import enum_column @@ -9,6 +10,7 @@ from src.db.models.impl.url.probed_for_404 import URLProbedFor404 from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.mixins import UpdatedAtMixin, CreatedAtMixin from src.db.models.templates_.with_id import WithIDBase @@ -60,6 +62,9 @@ class URL(UpdatedAtMixin, CreatedAtMixin, WithIDBase): auto_location_subtasks = relationship( AutoLocationIDSubtask ) + name_suggestions = relationship( + URLNameSuggestion + ) user_agency_suggestions = relationship( "UserUrlAgencySuggestion", back_populates="url") auto_record_type_suggestion = relationship( diff --git a/src/db/models/impl/url/suggestion/name/sqlalchemy.py b/src/db/models/impl/url/suggestion/name/sqlalchemy.py index d06d7305..2f11542d 100644 --- a/src/db/models/impl/url/suggestion/name/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/name/sqlalchemy.py @@ -1,4 +1,5 @@ from sqlalchemy import Column, String +from sqlalchemy.orm import Mapped from src.db.models.helpers import enum_column from src.db.models.impl.url.suggestion.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH @@ -16,7 +17,7 @@ class URLNameSuggestion( __tablename__ = "url_name_suggestions" suggestion = Column(String(MAX_SUGGESTION_LENGTH), nullable=False) - source = enum_column( + source: Mapped[NameSuggestionSource] = enum_column( NameSuggestionSource, name="suggestion_source_enum" ) \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index f3f17126..a7183f17 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -3,11 +3,13 @@ from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationUserSuggestion from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.get.queries.core import GetNextURLForAllAnnotationQueryBuilder +from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo @@ -41,6 +43,10 @@ async def test_annotate_all( # Get a valid URL to annotate get_response_1 = await ath.request_validator.get_next_url_for_all_annotations() assert get_response_1.next_annotation is not None + assert len(get_response_1.next_annotation.name_suggestions) == 1 + name_suggestion = get_response_1.next_annotation.name_suggestions[0] + assert name_suggestion.name is not None + assert name_suggestion.endorsement_count == 0 # Apply the second batch id as a filter and see that a different URL is returned get_response_2 = await ath.request_validator.get_next_url_for_all_annotations( @@ -61,7 +67,10 @@ async def test_annotate_all( location_ids=[ california.location_id, pennsylvania.location_id, - ] + ], + name_info=AnnotationPostNameInfo( + new_name="New Name" + ) ) ) assert post_response_1.next_annotation is not None @@ -75,7 +84,10 @@ async def test_annotate_all( all_annotations_post_info=AllAnnotationPostInfo( suggested_status=URLType.NOT_RELEVANT, location_ids=[], - agency_ids=[] + agency_ids=[], + name_info=AnnotationPostNameInfo( + existing_name_id=setup_info_2.name_suggestion_id + ) ) ) assert post_response_2.next_annotation is None @@ -136,4 +148,9 @@ async def test_annotate_all( for user_suggestion in user_suggestions: assert user_suggestion.user_count == 1 + # Confirm 3 name suggestions + name_suggestions: list[URLNameSuggestion] = await adb_client.get_all(URLNameSuggestion) + assert len(name_suggestions) == 3 + suggested_names: set[str] = {name_suggestion.suggestion for name_suggestion in name_suggestions} + assert "New Name" in suggested_names diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index 17032b60..8a2c7ef5 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -26,6 +26,8 @@ from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.enums import URLCreationEnum @@ -673,4 +675,16 @@ async def link_agencies_to_location( ) for agency_id in agency_ids ] - await self.adb_client.add_all(links) \ No newline at end of file + await self.adb_client.add_all(links) + + async def name_suggestion( + self, + url_id: int, + source: NameSuggestionSource = NameSuggestionSource.HTML_METADATA_TITLE, + ) -> int: + suggestion = URLNameSuggestion( + url_id=url_id, + source=source, + suggestion=f"Test Name {next_int()}", + ) + return await self.adb_client.add(suggestion, return_id=True) diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py index b3841b37..ababae82 100644 --- a/tests/helpers/setup/final_review/core.py +++ b/tests/helpers/setup/final_review/core.py @@ -60,6 +60,10 @@ async def add_relevant_suggestion(relevant: bool): record_type=RecordType.ARREST_RECORDS ) + name_suggestion_id: int = await db_data_creator.name_suggestion( + url_id=url_mapping.url_id, + ) + if include_user_annotations: await add_relevant_suggestion(False) await add_record_type_suggestion(RecordType.ACCIDENT_REPORTS) @@ -70,5 +74,6 @@ async def add_relevant_suggestion(relevant: bool): return FinalReviewSetupInfo( batch_id=batch_id, url_mapping=url_mapping, - user_agency_id=user_agency_id + user_agency_id=user_agency_id, + name_suggestion_id=name_suggestion_id ) diff --git a/tests/helpers/setup/final_review/model.py b/tests/helpers/setup/final_review/model.py index c75fb847..a3e57a3c 100644 --- a/tests/helpers/setup/final_review/model.py +++ b/tests/helpers/setup/final_review/model.py @@ -8,4 +8,5 @@ class FinalReviewSetupInfo(BaseModel): batch_id: int url_mapping: URLMapping - user_agency_id: Optional[int] + user_agency_id: int | None + name_suggestion_id: int | None