From a6f27db2874ffabccc24484c616824b48a0ace75 Mon Sep 17 00:00:00 2001 From: maxachis Date: Mon, 1 Dec 2025 12:55:32 -0500 Subject: [PATCH 1/2] Begin draft --- .../annotate/all/get/models/agency.py | 18 ++- .../annotate/all/get/models/location.py | 20 +++- .../annotate/all/get/queries/agency/core.py | 1 + .../all/get/queries/location_/core.py | 3 + .../all/get/queries/location_/requester.py | 110 +++++++++++++++++- src/db/models/impl/url/core/sqlalchemy.py | 26 ++++- .../suggestion/location/user/sqlalchemy.py | 5 +- src/db/templates/requester.py | 7 ++ 8 files changed, 176 insertions(+), 14 deletions(-) diff --git a/src/api/endpoints/annotate/all/get/models/agency.py b/src/api/endpoints/annotate/all/get/models/agency.py index 45806d98..2c685e6e 100644 --- a/src/api/endpoints/annotate/all/get/models/agency.py +++ b/src/api/endpoints/annotate/all/get/models/agency.py @@ -1,6 +1,16 @@ from pydantic import BaseModel, Field +class AgencyAnnotationSuggestion(BaseModel): + agency_id: int + agency_name: str + user_count: int + robo_confidence: int | None = Field( + description="The robo labeler's given confidence for its suggestion. Null if no robo-label occurred.", + ge=0, + le=100, + ) +# TODO: Replace Usages and Delete class AgencyAnnotationAutoSuggestion(BaseModel): agency_id: int agency_name: str @@ -10,11 +20,13 @@ class AgencyAnnotationAutoSuggestion(BaseModel): le=100, ) +# TODO: Replace Usages and Delete class AgencyAnnotationUserSuggestion(BaseModel): agency_id: int agency_name: str user_count: int +# TODO: Replace Usages and Delete class AgencyAnnotationUserSuggestionOuterInfo(BaseModel): suggestions: list[AgencyAnnotationUserSuggestion] not_found_count: int = Field( @@ -23,5 +35,7 @@ class AgencyAnnotationUserSuggestionOuterInfo(BaseModel): ) class AgencyAnnotationResponseOuterInfo(BaseModel): - user: AgencyAnnotationUserSuggestionOuterInfo - auto: list[AgencyAnnotationAutoSuggestion] \ No newline at end of file + suggestions: list[AgencyAnnotationSuggestion] + not_found_count: int = Field( + description="How many users indicated the agency could not be found." + ) diff --git a/src/api/endpoints/annotate/all/get/models/location.py b/src/api/endpoints/annotate/all/get/models/location.py index fb467004..4660ee52 100644 --- a/src/api/endpoints/annotate/all/get/models/location.py +++ b/src/api/endpoints/annotate/all/get/models/location.py @@ -1,6 +1,17 @@ from pydantic import BaseModel, Field +class LocationAnnotationSuggestion(BaseModel): + location_id: int + location_name: str + user_count: int + robo_confidence: int | None = Field( + description="The robo labeler's given confidence for its suggestion. Null if no robo-label occurred.", + ge=0, + le=100, + ) + +# TODO: Replace Usages and Delete class LocationAnnotationAutoSuggestion(BaseModel): location_id: int location_name: str = Field( @@ -12,7 +23,7 @@ class LocationAnnotationAutoSuggestion(BaseModel): le=100, ) - +# TODO: Replace Usages and Delete class LocationAnnotationUserSuggestion(BaseModel): location_id: int location_name: str = Field( @@ -23,6 +34,7 @@ class LocationAnnotationUserSuggestion(BaseModel): ge=1, ) +# TODO: Replace Usages and Delete class LocationAnnotationUserSuggestionOuterInfo(BaseModel): suggestions: list[LocationAnnotationUserSuggestion] not_found_count: int = Field( @@ -31,5 +43,7 @@ class LocationAnnotationUserSuggestionOuterInfo(BaseModel): ) class LocationAnnotationResponseOuterInfo(BaseModel): - user: LocationAnnotationUserSuggestionOuterInfo - auto: list[LocationAnnotationAutoSuggestion] \ No newline at end of file + suggestions: list[LocationAnnotationSuggestion] + not_found_count: int = Field( + description="How many users indicated the location could not be found." + ) \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/agency/core.py b/src/api/endpoints/annotate/all/get/queries/agency/core.py index 28cfbd2d..d3502b96 100644 --- a/src/api/endpoints/annotate/all/get/queries/agency/core.py +++ b/src/api/endpoints/annotate/all/get/queries/agency/core.py @@ -30,6 +30,7 @@ async def run(self, session: AsyncSession) -> AgencyAnnotationResponseOuterInfo: location_id=self.location_id ) + # TODO: Pull both in single query user_suggestions: list[AgencyAnnotationUserSuggestion] = \ await requester.get_user_agency_suggestions() auto_suggestions: list[AgencyAnnotationAutoSuggestion] = \ diff --git a/src/api/endpoints/annotate/all/get/queries/location_/core.py b/src/api/endpoints/annotate/all/get/queries/location_/core.py index 85db523c..3ef0fb99 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/core.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/core.py @@ -24,6 +24,9 @@ def __init__( async def run(self, session: AsyncSession) -> LocationAnnotationResponseOuterInfo: requester = GetLocationSuggestionsRequester(session) + + # TODO: Pull both in single query + suggestions user_suggestions: list[LocationAnnotationUserSuggestion] = \ await requester.get_user_location_suggestions(self.url_id) auto_suggestions: list[LocationAnnotationAutoSuggestion] = \ diff --git a/src/api/endpoints/annotate/all/get/queries/location_/requester.py b/src/api/endpoints/annotate/all/get/queries/location_/requester.py index 6ad56c56..abae28ee 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/requester.py @@ -1,9 +1,11 @@ from typing import Sequence -from sqlalchemy import select, func, RowMapping +from sqlalchemy import select, func, RowMapping, or_, and_ from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationUserSuggestion, \ - LocationAnnotationAutoSuggestion + LocationAnnotationAutoSuggestion, LocationAnnotationSuggestion +from src.db.helpers.query import exists_url +from src.db.helpers.session import session_helper as sh from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion @@ -11,10 +13,112 @@ from src.db.models.views.location_expanded import LocationExpandedView from src.db.templates.requester import RequesterBase -from src.db.helpers.session import session_helper as sh class GetLocationSuggestionsRequester(RequesterBase): + async def get_location_suggestions(self, url_id: int) -> list[LocationAnnotationSuggestion]: + # All locations with either a user or robo annotation + valid_locations_cte = ( + select( + LocationExpandedView.id, + ) + .where( + or_( + exists_url( + UserLocationSuggestion + ), + exists_url( + AutoLocationIDSubtask + ) + ) + ) + .cte("valid_locations") + ) + # Number of users who suggested each location + user_suggestions_cte = ( + select( + UserLocationSuggestion.url_id, + LocationExpandedView.id, + func.count(UserLocationSuggestion.user_id).label('user_count') + ) + .outerjoin( + LocationExpandedView, + LocationExpandedView.id == UserLocationSuggestion.location_id + ) + .group_by( + UserLocationSuggestion.location_id, + UserLocationSuggestion.url_id, + ) + .cte("user_suggestions") + ) + # Maximum confidence of robo annotation, if any + robo_suggestions_cte = ( + select( + AutoLocationIDSubtask.url_id, + LocationExpandedView.id, + func.max(LocationIDSubtaskSuggestion.confidence).label('robo_confidence') + ) + .outerjoin( + LocationExpandedView, + LocationExpandedView.id == LocationIDSubtaskSuggestion.location_id + ) + .join( + AutoLocationIDSubtask, + AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id + ) + .group_by( + LocationExpandedView.id, + AutoLocationIDSubtask.url_id, + ) + .cte("robo_suggestions") + ) + # Join user and robo suggestions + joined_suggestions_query = ( + select( + valid_locations_cte.c.id.label("location_id"), + LocationExpandedView.full_display_name.label("location_name"), + user_suggestions_cte.c.user_count, + robo_suggestions_cte.c.robo_confidence, + ) + .join( + LocationExpandedView, + LocationExpandedView.id == valid_locations_cte.c.id + ) + .outerjoin( + user_suggestions_cte, + and_( + user_suggestions_cte.c.url_id == url_id, + user_suggestions_cte.c.location_id == LocationExpandedView.id + ) + ) + .outerjoin( + robo_suggestions_cte, + and_( + robo_suggestions_cte.c.url_id == url_id, + robo_suggestions_cte.c.location_id == LocationExpandedView.id + ) + ) + ) + + mappings: Sequence[RowMapping] = await self.mappings(joined_suggestions_query) + suggestions: list[LocationAnnotationSuggestion] = [ + LocationAnnotationSuggestion( + **mapping + ) + for mapping in mappings + ] + return suggestions + + async def get_location_not_found_suggestions(self, url_id: int ) -> int: + query = ( + select( + func.count(LinkUserSuggestionLocationNotFound.user_id) + ) + .where( + LinkUserSuggestionLocationNotFound.url_id == url_id + ) + ) + return await self.scalar(query) async def get_user_location_suggestions(self, url_id: int) -> list[LocationAnnotationUserSuggestion]: query = ( diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index 8ee51a43..de4af177 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -5,11 +5,13 @@ from src.collectors.enums import URLStatus from src.db.models.helpers import enum_column +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask +from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.mixins import UpdatedAtMixin, CreatedAtMixin @@ -85,25 +87,39 @@ def full_url(cls): secondary="link_tasks__urls", back_populates="urls", ) - auto_agency_subtasks = relationship( - "URLAutoAgencyIDSubtask" + + + name_suggestions = relationship( + URLNameSuggestion + ) + # Location + user_location_suggestions = relationship( + UserLocationSuggestion + ) + user_location_suggestion_not_found = relationship( + LinkUserSuggestionLocationNotFound ) auto_location_subtasks = relationship( AutoLocationIDSubtask ) - name_suggestions = relationship( - URLNameSuggestion - ) + + # Agency user_agency_suggestions = relationship( "UserURLAgencySuggestion", back_populates="url") + auto_agency_subtasks = relationship( + "URLAutoAgencyIDSubtask" + ) + # Record Type auto_record_type_suggestion = relationship( "AutoRecordTypeSuggestion", uselist=False, back_populates="url") user_record_type_suggestions = relationship( "UserRecordTypeSuggestion", back_populates="url") + # Relvant/URL Type auto_relevant_suggestion = relationship( "AutoRelevantSuggestion", uselist=False, back_populates="url") user_relevant_suggestions = relationship( "UserURLTypeSuggestion", back_populates="url") + reviewing_user = relationship( "ReviewingUserURL", uselist=False, back_populates="url") optional_data_source_metadata = relationship( diff --git a/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py b/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py index a9d4ae8b..18ac3851 100644 --- a/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py @@ -1,5 +1,7 @@ from sqlalchemy import Integer, Column, PrimaryKeyConstraint +from sqlalchemy.orm import relationship +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.mixins import CreatedAtMixin, URLDependentMixin, LocationDependentMixin from src.db.models.templates_.base import Base @@ -18,4 +20,5 @@ class UserLocationSuggestion( user_id = Column( Integer, nullable=False, - ) \ No newline at end of file + ) + diff --git a/src/db/templates/requester.py b/src/db/templates/requester.py index b56af87f..9588ea9d 100644 --- a/src/db/templates/requester.py +++ b/src/db/templates/requester.py @@ -4,6 +4,7 @@ """ from abc import ABC +from sqlalchemy import Select from sqlalchemy.ext.asyncio import AsyncSession import src.db.helpers.session.session_helper as sh @@ -16,5 +17,11 @@ def __init__(self, session: AsyncSession): self.session = session self.session_helper = sh + async def scalar(self, query: Select): + return await sh.scalar(self.session, query=query) + + async def mappings(self, query: Select): + return await sh.mappings(self.session, query=query) + async def run_query_builder(self, query_builder: QueryBuilderBase): return await query_builder.run(session=self.session) \ No newline at end of file From 6981bf939704a956990e8fa89e9c585ebd86fb2d Mon Sep 17 00:00:00 2001 From: Max Chis Date: Mon, 1 Dec 2025 14:51:35 -0500 Subject: [PATCH 2/2] Update annotations to join user and robo suggestions for locations and agencies --- .../annotate/all/get/models/agency.py | 24 --- .../annotate/all/get/models/location.py | 31 ---- .../annotate/all/get/queries/agency/core.py | 24 +-- .../all/get/queries/agency/requester.py | 148 +++++++++--------- .../all/get/queries/location_/core.py | 27 +--- .../all/get/queries/location_/requester.py | 92 +---------- .../agency/suggestion/sqlalchemy.py | 1 - .../api/annotate/all/test_happy_path.py | 23 ++- 8 files changed, 105 insertions(+), 265 deletions(-) diff --git a/src/api/endpoints/annotate/all/get/models/agency.py b/src/api/endpoints/annotate/all/get/models/agency.py index 2c685e6e..fc568af3 100644 --- a/src/api/endpoints/annotate/all/get/models/agency.py +++ b/src/api/endpoints/annotate/all/get/models/agency.py @@ -10,30 +10,6 @@ class AgencyAnnotationSuggestion(BaseModel): le=100, ) -# TODO: Replace Usages and Delete -class AgencyAnnotationAutoSuggestion(BaseModel): - agency_id: int - agency_name: str - confidence: int = Field( - title="The confidence of the location", - ge=0, - le=100, - ) - -# TODO: Replace Usages and Delete -class AgencyAnnotationUserSuggestion(BaseModel): - agency_id: int - agency_name: str - user_count: int - -# TODO: Replace Usages and Delete -class AgencyAnnotationUserSuggestionOuterInfo(BaseModel): - suggestions: list[AgencyAnnotationUserSuggestion] - not_found_count: int = Field( - title="How many users listed the agency as not found.", - ge=0, - ) - class AgencyAnnotationResponseOuterInfo(BaseModel): suggestions: list[AgencyAnnotationSuggestion] not_found_count: int = Field( diff --git a/src/api/endpoints/annotate/all/get/models/location.py b/src/api/endpoints/annotate/all/get/models/location.py index 4660ee52..0100bbc4 100644 --- a/src/api/endpoints/annotate/all/get/models/location.py +++ b/src/api/endpoints/annotate/all/get/models/location.py @@ -11,37 +11,6 @@ class LocationAnnotationSuggestion(BaseModel): le=100, ) -# TODO: Replace Usages and Delete -class LocationAnnotationAutoSuggestion(BaseModel): - location_id: int - location_name: str = Field( - title="The full name of the location" - ) - confidence: int = Field( - title="The confidence of the location", - ge=0, - le=100, - ) - -# TODO: Replace Usages and Delete -class LocationAnnotationUserSuggestion(BaseModel): - location_id: int - location_name: str = Field( - title="The full name of the location" - ) - user_count: int = Field( - title="The number of users who suggested this location", - ge=1, - ) - -# TODO: Replace Usages and Delete -class LocationAnnotationUserSuggestionOuterInfo(BaseModel): - suggestions: list[LocationAnnotationUserSuggestion] - not_found_count: int = Field( - title="How many users listed the location as not found.", - ge=0, - ) - class LocationAnnotationResponseOuterInfo(BaseModel): suggestions: list[LocationAnnotationSuggestion] not_found_count: int = Field( diff --git a/src/api/endpoints/annotate/all/get/queries/agency/core.py b/src/api/endpoints/annotate/all/get/queries/agency/core.py index d3502b96..d9a86717 100644 --- a/src/api/endpoints/annotate/all/get/queries/agency/core.py +++ b/src/api/endpoints/annotate/all/get/queries/agency/core.py @@ -1,13 +1,7 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationResponseOuterInfo, \ - AgencyAnnotationUserSuggestionOuterInfo, AgencyAnnotationUserSuggestion, AgencyAnnotationAutoSuggestion -from src.api.endpoints.annotate.all.get.queries.agency.requester import GetAgencySuggestionsRequester -from src.db.queries.base.builder import QueryBuilderBase -from sqlalchemy.ext.asyncio import AsyncSession - -from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationResponseOuterInfo, \ - AgencyAnnotationUserSuggestionOuterInfo, AgencyAnnotationUserSuggestion, AgencyAnnotationAutoSuggestion +from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationResponseOuterInfo +from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationSuggestion from src.api.endpoints.annotate.all.get.queries.agency.requester import GetAgencySuggestionsRequester from src.db.queries.base.builder import QueryBuilderBase @@ -30,19 +24,13 @@ async def run(self, session: AsyncSession) -> AgencyAnnotationResponseOuterInfo: location_id=self.location_id ) - # TODO: Pull both in single query - user_suggestions: list[AgencyAnnotationUserSuggestion] = \ - await requester.get_user_agency_suggestions() - auto_suggestions: list[AgencyAnnotationAutoSuggestion] = \ - await requester.get_auto_agency_suggestions() + suggestions: list[AgencyAnnotationSuggestion] = \ + await requester.get_agency_suggestions() not_found_count: int = \ await requester.get_not_found_count() return AgencyAnnotationResponseOuterInfo( - user=AgencyAnnotationUserSuggestionOuterInfo( - suggestions=user_suggestions, - not_found_count=not_found_count - ), - auto=auto_suggestions, + suggestions=suggestions, + not_found_count=not_found_count ) diff --git a/src/api/endpoints/annotate/all/get/queries/agency/requester.py b/src/api/endpoints/annotate/all/get/queries/agency/requester.py index e6ffb817..28923cf2 100644 --- a/src/api/endpoints/annotate/all/get/queries/agency/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/agency/requester.py @@ -1,16 +1,15 @@ from typing import Sequence -from sqlalchemy import func, select, RowMapping +from sqlalchemy import func, select, RowMapping, or_, and_ from sqlalchemy.ext.asyncio import AsyncSession -from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationAutoSuggestion, \ - AgencyAnnotationUserSuggestion -from src.api.endpoints.annotate.all.get.queries.agency.suggestions_with_highest_confidence import \ - SuggestionsWithHighestConfidenceCTE +from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationSuggestion +from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask +from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.templates.requester import RequesterBase @@ -27,102 +26,97 @@ def __init__( self.url_id = url_id self.location_id = location_id - async def get_user_agency_suggestions(self) -> list[AgencyAnnotationUserSuggestion]: - query = ( + async def get_agency_suggestions(self) -> list[AgencyAnnotationSuggestion]: + # All agencies with either a user or robo annotation + valid_agencies_cte = ( select( - UserURLAgencySuggestion.agency_id, - func.count(UserURLAgencySuggestion.user_id).label("count"), - Agency.name.label("agency_name"), - ) - .join( - Agency, - Agency.id == UserURLAgencySuggestion.agency_id + Agency.id, ) - - ) - - if self.location_id is not None: - query = ( - query.join( - LinkAgencyLocation, - LinkAgencyLocation.agency_id == UserURLAgencySuggestion.agency_id - ) - .where( - LinkAgencyLocation.location_id == self.location_id + .where( + or_( + exists_url( + UserURLAgencySuggestion + ), + exists_url( + URLAutoAgencyIDSubtask + ) ) ) + .cte("valid_agencies") + ) - query = ( - query.where( - UserURLAgencySuggestion.url_id == self.url_id + # Number of users who suggested each agency + user_suggestions_cte = ( + select( + UserURLAgencySuggestion.url_id, + UserURLAgencySuggestion.agency_id, + func.count(UserURLAgencySuggestion.user_id).label('user_count') ) .group_by( UserURLAgencySuggestion.agency_id, - Agency.name + UserURLAgencySuggestion.url_id, ) - .order_by( - func.count(UserURLAgencySuggestion.user_id).desc() - ) - .limit(3) + .cte("user_suggestions") ) - results: Sequence[RowMapping] = await sh.mappings(self.session, query=query) - - return [ - AgencyAnnotationUserSuggestion( - agency_id=autosuggestion["agency_id"], - user_count=autosuggestion["count"], - agency_name=autosuggestion["agency_name"], + # Maximum confidence of robo annotation, if any + robo_suggestions_cte = ( + select( + URLAutoAgencyIDSubtask.url_id, + Agency.id.label("agency_id"), + func.max(AgencyIDSubtaskSuggestion.confidence).label('robo_confidence') ) - for autosuggestion in results - ] - - - async def get_auto_agency_suggestions(self) -> list[AgencyAnnotationAutoSuggestion]: - cte = SuggestionsWithHighestConfidenceCTE() - query = ( + .join( + AgencyIDSubtaskSuggestion, + AgencyIDSubtaskSuggestion.subtask_id == URLAutoAgencyIDSubtask.id + ) + .join( + Agency, + Agency.id == AgencyIDSubtaskSuggestion.agency_id + ) + .group_by( + URLAutoAgencyIDSubtask.url_id, + Agency.id + ) + .cte("robo_suggestions") + ) + # Join user and robo suggestions + joined_suggestions_query = ( select( - cte.agency_id, - cte.confidence, + valid_agencies_cte.c.id.label("agency_id"), Agency.name.label("agency_name"), + func.coalesce(user_suggestions_cte.c.user_count, 0).label('user_count'), + func.coalesce(robo_suggestions_cte.c.robo_confidence, 0).label('robo_confidence'), ) .join( Agency, - Agency.id == cte.agency_id + Agency.id == valid_agencies_cte.c.id ) - ) - - if self.location_id is not None: - query = ( - query.join( - LinkAgencyLocation, - LinkAgencyLocation.agency_id == cte.agency_id - ) - .where( - LinkAgencyLocation.location_id == self.location_id + .outerjoin( + user_suggestions_cte, + and_( + user_suggestions_cte.c.url_id == self.url_id, + user_suggestions_cte.c.agency_id == Agency.id ) ) - - query = ( - query.where( - cte.url_id == self.url_id - ) - .order_by( - cte.confidence.desc() + .outerjoin( + robo_suggestions_cte, + and_( + robo_suggestions_cte.c.url_id == self.url_id, + robo_suggestions_cte.c.agency_id == Agency.id + ) ) - .limit(3) ) - results: Sequence[RowMapping] = await sh.mappings(self.session, query=query) - - return [ - AgencyAnnotationAutoSuggestion( - agency_id=autosuggestion["agency_id"], - confidence=autosuggestion["confidence"], - agency_name=autosuggestion["agency_name"], + # Return suggestions + mappings: Sequence[RowMapping] = await self.mappings(joined_suggestions_query) + suggestions: list[AgencyAnnotationSuggestion] = [ + AgencyAnnotationSuggestion( + **mapping ) - for autosuggestion in results + for mapping in mappings ] + return suggestions async def get_not_found_count(self) -> int: query = ( diff --git a/src/api/endpoints/annotate/all/get/queries/location_/core.py b/src/api/endpoints/annotate/all/get/queries/location_/core.py index 3ef0fb99..e1909b77 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/core.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/core.py @@ -1,13 +1,7 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo, \ - LocationAnnotationUserSuggestion, LocationAnnotationAutoSuggestion, LocationAnnotationUserSuggestionOuterInfo -from src.api.endpoints.annotate.all.get.queries.location_.requester import GetLocationSuggestionsRequester -from src.db.queries.base.builder import QueryBuilderBase -from sqlalchemy.ext.asyncio import AsyncSession - -from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo, \ - LocationAnnotationUserSuggestion, LocationAnnotationAutoSuggestion +from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo +from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationSuggestion from src.api.endpoints.annotate.all.get.queries.location_.requester import GetLocationSuggestionsRequester from src.db.queries.base.builder import QueryBuilderBase @@ -21,24 +15,17 @@ def __init__( super().__init__() self.url_id = url_id - + # TODO: Test async def run(self, session: AsyncSession) -> LocationAnnotationResponseOuterInfo: requester = GetLocationSuggestionsRequester(session) - # TODO: Pull both in single query - suggestions - user_suggestions: list[LocationAnnotationUserSuggestion] = \ - await requester.get_user_location_suggestions(self.url_id) - auto_suggestions: list[LocationAnnotationAutoSuggestion] = \ - await requester.get_auto_location_suggestions(self.url_id) + suggestions: list[LocationAnnotationSuggestion] = \ + await requester.get_location_suggestions(self.url_id) not_found_count: int = \ await requester.get_not_found_count(self.url_id) return LocationAnnotationResponseOuterInfo( - user=LocationAnnotationUserSuggestionOuterInfo( - suggestions=user_suggestions, - not_found_count=not_found_count - ), - auto=auto_suggestions + suggestions=suggestions, + not_found_count=not_found_count ) diff --git a/src/api/endpoints/annotate/all/get/queries/location_/requester.py b/src/api/endpoints/annotate/all/get/queries/location_/requester.py index abae28ee..26175322 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/requester.py @@ -2,8 +2,7 @@ from sqlalchemy import select, func, RowMapping, or_, and_ -from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationUserSuggestion, \ - LocationAnnotationAutoSuggestion, LocationAnnotationSuggestion +from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationSuggestion from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound @@ -38,13 +37,9 @@ async def get_location_suggestions(self, url_id: int) -> list[LocationAnnotation user_suggestions_cte = ( select( UserLocationSuggestion.url_id, - LocationExpandedView.id, + UserLocationSuggestion.location_id, func.count(UserLocationSuggestion.user_id).label('user_count') ) - .outerjoin( - LocationExpandedView, - LocationExpandedView.id == UserLocationSuggestion.location_id - ) .group_by( UserLocationSuggestion.location_id, UserLocationSuggestion.url_id, @@ -55,10 +50,10 @@ async def get_location_suggestions(self, url_id: int) -> list[LocationAnnotation robo_suggestions_cte = ( select( AutoLocationIDSubtask.url_id, - LocationExpandedView.id, + LocationExpandedView.id.label("location_id"), func.max(LocationIDSubtaskSuggestion.confidence).label('robo_confidence') ) - .outerjoin( + .join( LocationExpandedView, LocationExpandedView.id == LocationIDSubtaskSuggestion.location_id ) @@ -77,8 +72,8 @@ async def get_location_suggestions(self, url_id: int) -> list[LocationAnnotation select( valid_locations_cte.c.id.label("location_id"), LocationExpandedView.full_display_name.label("location_name"), - user_suggestions_cte.c.user_count, - robo_suggestions_cte.c.robo_confidence, + func.coalesce(user_suggestions_cte.c.user_count, 0).label("user_count"), + func.coalesce(robo_suggestions_cte.c.robo_confidence, 0).label("robo_confidence"), ) .join( LocationExpandedView, @@ -109,81 +104,6 @@ async def get_location_suggestions(self, url_id: int) -> list[LocationAnnotation ] return suggestions - async def get_location_not_found_suggestions(self, url_id: int ) -> int: - query = ( - select( - func.count(LinkUserSuggestionLocationNotFound.user_id) - ) - .where( - LinkUserSuggestionLocationNotFound.url_id == url_id - ) - ) - return await self.scalar(query) - - async def get_user_location_suggestions(self, url_id: int) -> list[LocationAnnotationUserSuggestion]: - query = ( - select( - UserLocationSuggestion.location_id, - LocationExpandedView.full_display_name.label("location_name"), - func.count(UserLocationSuggestion.user_id).label('user_count') - ) - .join( - LocationExpandedView, - LocationExpandedView.id == UserLocationSuggestion.location_id - ) - .where( - UserLocationSuggestion.url_id == url_id - ) - .group_by( - UserLocationSuggestion.location_id, - LocationExpandedView.full_display_name - ) - .order_by( - func.count(UserLocationSuggestion.user_id).desc() - ) - ) - raw_results: Sequence[RowMapping] = await sh.mappings(self.session, query) - return [ - LocationAnnotationUserSuggestion( - **raw_result - ) - for raw_result in raw_results - ] - - - - async def get_auto_location_suggestions( - self, - url_id: int - ) -> list[LocationAnnotationAutoSuggestion]: - query = ( - select( - LocationExpandedView.full_display_name.label("location_name"), - LocationIDSubtaskSuggestion.location_id, - LocationIDSubtaskSuggestion.confidence, - ) - .join( - LocationExpandedView, - LocationExpandedView.id == LocationIDSubtaskSuggestion.location_id - ) - .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id - ) - .where( - AutoLocationIDSubtask.url_id == url_id - ) - .order_by( - LocationIDSubtaskSuggestion.confidence.desc() - ) - ) - raw_results: Sequence[RowMapping] = await sh.mappings(self.session, query) - return [ - LocationAnnotationAutoSuggestion( - **raw_result - ) - for raw_result in raw_results - ] async def get_not_found_count(self, url_id: int) -> int: query = ( diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py b/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py index b6b2cc01..3f8b8186 100644 --- a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py @@ -1,5 +1,4 @@ import sqlalchemy as sa -from sqlalchemy import PrimaryKeyConstraint from sqlalchemy.orm import relationship from src.db.models.mixins import CreatedAtMixin, AgencyDependentMixin diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index 007e87f7..1505d0b7 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -1,6 +1,6 @@ import pytest -from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationUserSuggestion +from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationSuggestion from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.get.queries.core import GetNextURLForAllAnnotationQueryBuilder from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo @@ -140,20 +140,27 @@ async def test_annotate_all( user_id=99, ) ) - user_suggestions: list[LocationAnnotationUserSuggestion] = \ - response.next_annotation.location_suggestions.user.suggestions - assert len(user_suggestions) == 2 + suggestions: list[LocationAnnotationSuggestion] = response.next_annotation.location_suggestions.suggestions + assert len(suggestions) == 2 - response_location_ids: list[int] = [location_suggestion.location_id for location_suggestion in user_suggestions] - assert set(response_location_ids) == {california.location_id, pennsylvania.location_id} + response_location_ids: list[int] = [ + location_suggestion.location_id + for location_suggestion in suggestions] - response_location_names: list[str] = [location_suggestion.location_name for location_suggestion in user_suggestions] + assert set(response_location_ids) == { + california.location_id, + pennsylvania.location_id + } + + response_location_names: list[str] = [ + location_suggestion.location_name + for location_suggestion in suggestions] assert set(response_location_names) == { "California", "Pennsylvania" } - for user_suggestion in user_suggestions: + for user_suggestion in suggestions: assert user_suggestion.user_count == 1 # Confirm 3 name suggestions