diff --git a/ENV.md b/ENV.md index 525fb3f4..d969358a 100644 --- a/ENV.md +++ b/ENV.md @@ -72,20 +72,21 @@ Note that some tasks/subtasks are themselves enabled by other tasks. URL Task Flags are collectively controlled by the `RUN_URL_TASKS_TASK_FLAG` flag. -| Flag | Description | -|-------------------------------------|--------------------------------------------------------------------| -| `URL_HTML_TASK_FLAG` | URL HTML scraping task. | -| `URL_RECORD_TYPE_TASK_FLAG` | Automatically assigns Record Types to URLs. | -| `URL_AGENCY_IDENTIFICATION_TASK_FLAG` | Automatically assigns and suggests Agencies for URLs. | -| `URL_SUBMIT_APPROVED_TASK_FLAG` | Submits approved URLs to the Data Sources App. | -| `URL_MISC_METADATA_TASK_FLAG` | Adds misc metadata to URLs. | -| `URL_404_PROBE_TASK_FLAG` | Probes URLs for 404 errors. | -| `URL_AUTO_RELEVANCE_TASK_FLAG` | Automatically assigns Relevances to URLs. | -| `URL_PROBE_TASK_FLAG` | Probes URLs for web metadata. | -| `URL_ROOT_URL_TASK_FLAG` | Extracts and links Root URLs to URLs. | -| `URL_SCREENSHOT_TASK_FLAG` | Takes screenshots of URLs. | -| `URL_AUTO_VALIDATE_TASK_FLAG` | Automatically validates URLs. | -| `URL_AUTO_NAME_TASK_FLAG` | Automatically names URLs. | +| Flag | Description | +|-------------------------------------|-------------------------------------------------------| +| `URL_HTML_TASK_FLAG` | URL HTML scraping task. | +| `URL_RECORD_TYPE_TASK_FLAG` | Automatically assigns Record Types to URLs. | +| `URL_AGENCY_IDENTIFICATION_TASK_FLAG` | Automatically assigns and suggests Agencies for URLs. | +| `URL_SUBMIT_APPROVED_TASK_FLAG` | Submits approved URLs to the Data Sources App. | +| `URL_MISC_METADATA_TASK_FLAG` | Adds misc metadata to URLs. | +| `URL_404_PROBE_TASK_FLAG` | Probes URLs for 404 errors. | +| `URL_AUTO_RELEVANCE_TASK_FLAG` | Automatically assigns Relevances to URLs. | +| `URL_PROBE_TASK_FLAG` | Probes URLs for web metadata. | +| `URL_ROOT_URL_TASK_FLAG` | Extracts and links Root URLs to URLs. | +| `URL_SCREENSHOT_TASK_FLAG` | Takes screenshots of URLs. | +| `URL_AUTO_VALIDATE_TASK_FLAG` | Automatically validates URLs. | +| `URL_AUTO_NAME_TASK_FLAG` | Automatically names URLs. | +| `URL_SUSPEND_TASK_FLAG` | Suspends URLs meeting suspension criteria. | ### Agency ID Subtasks diff --git a/alembic/versions/2025_09_29_1246-5be534715a01_add_agency_location_not_found_logic.py b/alembic/versions/2025_09_29_1246-5be534715a01_add_agency_location_not_found_logic.py new file mode 100644 index 00000000..171adcbe --- /dev/null +++ b/alembic/versions/2025_09_29_1246-5be534715a01_add_agency_location_not_found_logic.py @@ -0,0 +1,74 @@ +"""Add Agency/Location Not Found Logic + +Revision ID: 5be534715a01 +Revises: 50a710e413f8 +Create Date: 2025-09-29 12:46:27.140173 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import created_at_column, url_id_column, user_id_column + +# revision identifiers, used by Alembic. +revision: str = '5be534715a01' +down_revision: Union[str, None] = '50a710e413f8' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +def upgrade() -> None: + add_link_user_suggestion_agency_not_found_table() + add_link_user_suggestion_location_not_found_table() + add_flag_url_suspended_table() + add_url_suspend_task_type() + remove_link_url_new_agency_suggestion_table() + remove_new_agency_suggestions_table() + +def add_url_suspend_task_type(): + op.execute( + """ + ALTER TYPE task_type ADD VALUE 'Suspend URLs'; + """ + ) + +def add_link_user_suggestion_agency_not_found_table(): + op.create_table( + "link_user_suggestion_agency_not_found", + user_id_column(), + url_id_column(), + created_at_column(), + sa.PrimaryKeyConstraint("user_id", "url_id"), + ) + + +def add_link_user_suggestion_location_not_found_table(): + op.create_table( + "link_user_suggestion_location_not_found", + user_id_column(), + url_id_column(), + created_at_column(), + sa.PrimaryKeyConstraint("user_id", "url_id"), + ) + + +def add_flag_url_suspended_table(): + op.create_table( + "flag_url_suspended", + url_id_column(), + created_at_column(), + sa.PrimaryKeyConstraint("url_id"), + ) + + +def remove_link_url_new_agency_suggestion_table(): + op.drop_table("link_url_new_agency_suggestion") + + +def remove_new_agency_suggestions_table(): + op.drop_table("new_agency_suggestions") + + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/core.py b/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/core.py deleted file mode 100644 index a9a33e84..00000000 --- a/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/core.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import Sequence - -from sqlalchemy import select, RowMapping -from sqlalchemy.ext.asyncio import AsyncSession - -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo -from src.api.endpoints.annotate.agency.get.queries.agency_suggestion_.suggestions_with_highest_confidence import \ - SuggestionsWithHighestConfidenceCTE -from src.core.enums import SuggestionType -from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.queries.base.builder import QueryBuilderBase - -from src.db.helpers.session import session_helper as sh - -class GetAgencySuggestionsQueryBuilder(QueryBuilderBase): - - def __init__( - self, - url_id: int - ): - super().__init__() - self.url_id = url_id - - async def run(self, session: AsyncSession) -> list[GetNextURLForAgencyAgencyInfo]: - # Get relevant autosuggestions and agency info, if an associated agency exists - - cte = SuggestionsWithHighestConfidenceCTE() - - query = ( - select( - cte.agency_id, - cte.confidence, - Agency.name, - Agency.state, - Agency.county, - Agency.locality - ) - .outerjoin( - Agency, - Agency.agency_id == cte.agency_id - ) - .where( - cte.url_id == self.url_id - ) - ) - - raw_autosuggestions: Sequence[RowMapping] = await sh.mappings(session, query=query) - if len(raw_autosuggestions) == 0: - # Unknown agency - return [ - GetNextURLForAgencyAgencyInfo( - suggestion_type=SuggestionType.UNKNOWN, - ) - ] - - agency_suggestions: list[GetNextURLForAgencyAgencyInfo] = [] - for autosuggestion in raw_autosuggestions: - agency_id: int = autosuggestion["agency_id"] - name: str = autosuggestion["name"] - state: str | None = autosuggestion["state"] - county: str | None = autosuggestion["county"] - locality: str | None = autosuggestion["locality"] - agency_suggestions.append( - GetNextURLForAgencyAgencyInfo( - suggestion_type=SuggestionType.AUTO_SUGGESTION, - pdap_agency_id=agency_id, - agency_name=name, - state=state, - county=county, - locality=locality - ) - ) - return agency_suggestions \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/models/agency.py b/src/api/endpoints/annotate/all/get/models/agency.py new file mode 100644 index 00000000..45806d98 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/models/agency.py @@ -0,0 +1,27 @@ +from pydantic import BaseModel, Field + + +class AgencyAnnotationAutoSuggestion(BaseModel): + agency_id: int + agency_name: str + confidence: int = Field( + title="The confidence of the location", + ge=0, + le=100, + ) + +class AgencyAnnotationUserSuggestion(BaseModel): + agency_id: int + agency_name: str + user_count: int + +class AgencyAnnotationUserSuggestionOuterInfo(BaseModel): + suggestions: list[AgencyAnnotationUserSuggestion] + not_found_count: int = Field( + title="How many users listed the agency as not found.", + ge=0, + ) + +class AgencyAnnotationResponseOuterInfo(BaseModel): + user: AgencyAnnotationUserSuggestionOuterInfo + auto: list[AgencyAnnotationAutoSuggestion] \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/models/location.py b/src/api/endpoints/annotate/all/get/models/location.py index b2d730c4..fb467004 100644 --- a/src/api/endpoints/annotate/all/get/models/location.py +++ b/src/api/endpoints/annotate/all/get/models/location.py @@ -23,7 +23,13 @@ class LocationAnnotationUserSuggestion(BaseModel): ge=1, ) +class LocationAnnotationUserSuggestionOuterInfo(BaseModel): + suggestions: list[LocationAnnotationUserSuggestion] + not_found_count: int = Field( + title="How many users listed the location as not found.", + ge=0, + ) class LocationAnnotationResponseOuterInfo(BaseModel): - user: list[LocationAnnotationUserSuggestion] + user: LocationAnnotationUserSuggestionOuterInfo auto: list[LocationAnnotationAutoSuggestion] \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/models/response.py b/src/api/endpoints/annotate/all/get/models/response.py index 3f280465..989dbf8d 100644 --- a/src/api/endpoints/annotate/all/get/models/response.py +++ b/src/api/endpoints/annotate/all/get/models/response.py @@ -3,6 +3,7 @@ from pydantic import Field, BaseModel from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo +from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationResponseOuterInfo from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion from src.api.endpoints.annotate.all.get.models.record_type import RecordTypeAnnotationSuggestion @@ -13,7 +14,7 @@ class GetNextURLForAllAnnotationInnerResponse(AnnotationInnerResponseInfoBase): - agency_suggestions: list[GetNextURLForAgencyAgencyInfo] | None = Field( + agency_suggestions: AgencyAnnotationResponseOuterInfo | None = Field( title="The auto-labeler's suggestions for agencies" ) location_suggestions: LocationAnnotationResponseOuterInfo | None = Field( diff --git a/src/api/endpoints/annotate/agency/get/queries/__init__.py b/src/api/endpoints/annotate/all/get/queries/agency/__init__.py similarity index 100% rename from src/api/endpoints/annotate/agency/get/queries/__init__.py rename to src/api/endpoints/annotate/all/get/queries/agency/__init__.py diff --git a/src/api/endpoints/annotate/all/get/queries/agency/core.py b/src/api/endpoints/annotate/all/get/queries/agency/core.py new file mode 100644 index 00000000..236aae88 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/agency/core.py @@ -0,0 +1,44 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationResponseOuterInfo, \ + AgencyAnnotationUserSuggestionOuterInfo, AgencyAnnotationUserSuggestion, AgencyAnnotationAutoSuggestion +from src.api.endpoints.annotate.all.get.queries.agency.requester import GetAgencySuggestionsRequester +from src.db.queries.base.builder import QueryBuilderBase +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationResponseOuterInfo, \ + AgencyAnnotationUserSuggestionOuterInfo, AgencyAnnotationUserSuggestion, AgencyAnnotationAutoSuggestion +from src.api.endpoints.annotate.all.get.queries.agency.requester import GetAgencySuggestionsRequester +from src.db.queries.base.builder import QueryBuilderBase + + +class GetAgencySuggestionsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + url_id: int + ): + super().__init__() + self.url_id = url_id + + async def run(self, session: AsyncSession) -> AgencyAnnotationResponseOuterInfo: + requester = GetAgencySuggestionsRequester( + session, + url_id=self.url_id + ) + + user_suggestions: list[AgencyAnnotationUserSuggestion] = \ + await requester.get_user_agency_suggestions() + auto_suggestions: list[AgencyAnnotationAutoSuggestion] = \ + await requester.get_auto_agency_suggestions() + not_found_count: int = \ + await requester.get_not_found_count() + return AgencyAnnotationResponseOuterInfo( + user=AgencyAnnotationUserSuggestionOuterInfo( + suggestions=user_suggestions, + not_found_count=not_found_count + ), + auto=auto_suggestions, + ) + + diff --git a/src/api/endpoints/annotate/all/get/queries/agency/requester.py b/src/api/endpoints/annotate/all/get/queries/agency/requester.py new file mode 100644 index 00000000..bec13508 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/agency/requester.py @@ -0,0 +1,101 @@ +from typing import Sequence + +from sqlalchemy import func, select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationAutoSuggestion, \ + AgencyAnnotationUserSuggestion +from src.api.endpoints.annotate.all.get.queries.agency.suggestions_with_highest_confidence import \ + SuggestionsWithHighestConfidenceCTE +from src.db.helpers.session import session_helper as sh +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.templates.requester import RequesterBase + + +class GetAgencySuggestionsRequester(RequesterBase): + + def __init__(self, session: AsyncSession, url_id: int): + super().__init__(session) + self.url_id = url_id + + async def get_user_agency_suggestions(self) -> list[AgencyAnnotationUserSuggestion]: + query = ( + select( + UserUrlAgencySuggestion.agency_id, + func.count(UserUrlAgencySuggestion.user_id).label("count"), + Agency.name.label("agency_name"), + ) + .join( + Agency, + Agency.agency_id == UserUrlAgencySuggestion.agency_id + ) + .where( + UserUrlAgencySuggestion.url_id == self.url_id + ) + .group_by( + UserUrlAgencySuggestion.agency_id, + Agency.name + ) + .order_by( + func.count(UserUrlAgencySuggestion.user_id).desc() + ) + .limit(3) + ) + + results: Sequence[RowMapping] = await sh.mappings(self.session, query=query) + + return [ + AgencyAnnotationUserSuggestion( + agency_id=autosuggestion["agency_id"], + user_count=autosuggestion["count"], + agency_name=autosuggestion["agency_name"], + ) + for autosuggestion in results + ] + + + async def get_auto_agency_suggestions(self) -> list[AgencyAnnotationAutoSuggestion]: + cte = SuggestionsWithHighestConfidenceCTE() + query = ( + select( + cte.agency_id, + cte.confidence, + Agency.name.label("agency_name"), + ) + .outerjoin( + Agency, + Agency.agency_id == cte.agency_id + ) + .where( + cte.url_id == self.url_id + ) + .order_by( + cte.confidence.desc() + ) + .limit(3) + ) + + results: Sequence[RowMapping] = await sh.mappings(self.session, query=query) + + return [ + AgencyAnnotationAutoSuggestion( + agency_id=autosuggestion["agency_id"], + confidence=autosuggestion["confidence"], + agency_name=autosuggestion["agency_name"], + ) + for autosuggestion in results + ] + + async def get_not_found_count(self) -> int: + query = ( + select( + func.count(LinkUserSuggestionAgencyNotFound.user_id) + ) + .where( + LinkUserSuggestionAgencyNotFound.url_id == self.url_id + ) + ) + + return await sh.scalar(self.session, query=query) \ No newline at end of file diff --git a/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/suggestions_with_highest_confidence.py b/src/api/endpoints/annotate/all/get/queries/agency/suggestions_with_highest_confidence.py similarity index 100% rename from src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/suggestions_with_highest_confidence.py rename to src/api/endpoints/annotate/all/get/queries/agency/suggestions_with_highest_confidence.py diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index cad49b90..fccf4f84 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -3,30 +3,28 @@ from sqlalchemy.orm import joinedload from src.api.endpoints.annotate._shared.queries.get_annotation_batch_info import GetAnnotationBatchInfoQueryBuilder -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo -from src.api.endpoints.annotate.agency.get.queries.agency_suggestion_.core import GetAgencySuggestionsQueryBuilder +from src.api.endpoints.annotate.all.get.models.agency import AgencyAnnotationResponseOuterInfo from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion from src.api.endpoints.annotate.all.get.models.record_type import RecordTypeAnnotationSuggestion from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse, \ GetNextURLForAllAnnotationInnerResponse from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion +from src.api.endpoints.annotate.all.get.queries.agency.core import GetAgencySuggestionsQueryBuilder from src.api.endpoints.annotate.all.get.queries.convert import \ convert_user_url_type_suggestion_to_url_type_annotation_suggestion, \ convert_user_record_type_suggestion_to_record_type_annotation_suggestion from src.api.endpoints.annotate.all.get.queries.location_.core import GetLocationSuggestionsQueryBuilder from src.api.endpoints.annotate.all.get.queries.name.core import GetNameSuggestionsQueryBuilder -from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo from src.collectors.enums import URLStatus from src.db.dto_converter import DTOConverter from src.db.dtos.url.mapping import URLMapping +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.models.views.unvalidated_url import UnvalidatedURL from src.db.models.views.url_anno_count import URLAnnotationCount @@ -103,6 +101,14 @@ async def run( UserRecordTypeSuggestion.url_id == URL.id, UserRecordTypeSuggestion.user_id == self.user_id, ) + ), + ~exists( + select( + FlagURLSuspended.url_id + ) + .where( + FlagURLSuspended.url_id == URL.id, + ) ) ) ) @@ -137,7 +143,7 @@ async def run( convert_user_record_type_suggestion_to_record_type_annotation_suggestion( url.user_record_type_suggestions ) - agency_suggestions: list[GetNextURLForAgencyAgencyInfo] = \ + agency_suggestions: AgencyAnnotationResponseOuterInfo = \ await GetAgencySuggestionsQueryBuilder(url_id=url.id).run(session) location_suggestions: LocationAnnotationResponseOuterInfo = \ await GetLocationSuggestionsQueryBuilder(url_id=url.id).run(session) diff --git a/src/api/endpoints/annotate/all/get/queries/location_/convert.py b/src/api/endpoints/annotate/all/get/queries/location_/convert.py deleted file mode 100644 index 6ed89186..00000000 --- a/src/api/endpoints/annotate/all/get/queries/location_/convert.py +++ /dev/null @@ -1,81 +0,0 @@ -from typing import Sequence - -from sqlalchemy import select, func, RowMapping - -from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationUserSuggestion, \ - LocationAnnotationAutoSuggestion -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.views.location_expanded import LocationExpandedView -from src.db.templates.requester import RequesterBase - -from src.db.helpers.session import session_helper as sh - -class GetLocationSuggestionsRequester(RequesterBase): - - - async def get_user_location_suggestions(self, url_id: int) -> list[LocationAnnotationUserSuggestion]: - query = ( - select( - UserLocationSuggestion.location_id, - LocationExpandedView.display_name.label("location_name"), - func.count(UserLocationSuggestion.user_id).label('user_count') - ) - .join( - LocationExpandedView, - LocationExpandedView.id == UserLocationSuggestion.location_id - ) - .where( - UserLocationSuggestion.url_id == url_id - ) - .group_by( - UserLocationSuggestion.location_id, - LocationExpandedView.display_name - ) - .order_by( - func.count(UserLocationSuggestion.user_id).desc() - ) - ) - raw_results: Sequence[RowMapping] = await sh.mappings(self.session, query) - return [ - LocationAnnotationUserSuggestion( - **raw_result - ) - for raw_result in raw_results - ] - - - - async def get_auto_location_suggestions( - self, - url_id: int - ) -> list[LocationAnnotationAutoSuggestion]: - query = ( - select( - LocationExpandedView.display_name.label("location_name"), - LocationIDSubtaskSuggestion.location_id, - LocationIDSubtaskSuggestion.confidence, - ) - .join( - LocationExpandedView, - LocationExpandedView.id == LocationIDSubtaskSuggestion.location_id - ) - .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id - ) - .where( - AutoLocationIDSubtask.url_id == url_id - ) - .order_by( - LocationIDSubtaskSuggestion.confidence.desc() - ) - ) - raw_results: Sequence[RowMapping] = await sh.mappings(self.session, query) - return [ - LocationAnnotationAutoSuggestion( - **raw_result - ) - for raw_result in raw_results - ] diff --git a/src/api/endpoints/annotate/all/get/queries/location_/core.py b/src/api/endpoints/annotate/all/get/queries/location_/core.py index cee9f758..85db523c 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/core.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/core.py @@ -1,14 +1,14 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo, \ - LocationAnnotationUserSuggestion, LocationAnnotationAutoSuggestion -from src.api.endpoints.annotate.all.get.queries.location_.convert import GetLocationSuggestionsRequester + LocationAnnotationUserSuggestion, LocationAnnotationAutoSuggestion, LocationAnnotationUserSuggestionOuterInfo +from src.api.endpoints.annotate.all.get.queries.location_.requester import GetLocationSuggestionsRequester from src.db.queries.base.builder import QueryBuilderBase from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo, \ LocationAnnotationUserSuggestion, LocationAnnotationAutoSuggestion -from src.api.endpoints.annotate.all.get.queries.location_.convert import GetLocationSuggestionsRequester +from src.api.endpoints.annotate.all.get.queries.location_.requester import GetLocationSuggestionsRequester from src.db.queries.base.builder import QueryBuilderBase @@ -28,9 +28,14 @@ async def run(self, session: AsyncSession) -> LocationAnnotationResponseOuterInf await requester.get_user_location_suggestions(self.url_id) auto_suggestions: list[LocationAnnotationAutoSuggestion] = \ await requester.get_auto_location_suggestions(self.url_id) + not_found_count: int = \ + await requester.get_not_found_count(self.url_id) return LocationAnnotationResponseOuterInfo( - user=user_suggestions, + user=LocationAnnotationUserSuggestionOuterInfo( + suggestions=user_suggestions, + not_found_count=not_found_count + ), auto=auto_suggestions ) diff --git a/src/api/endpoints/annotate/all/get/queries/location_/requester.py b/src/api/endpoints/annotate/all/get/queries/location_/requester.py index e69de29b..c635c5d4 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/requester.py @@ -0,0 +1,94 @@ +from typing import Sequence + +from sqlalchemy import select, func, RowMapping + +from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationUserSuggestion, \ + LocationAnnotationAutoSuggestion +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound +from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask +from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion +from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.views.location_expanded import LocationExpandedView +from src.db.templates.requester import RequesterBase + +from src.db.helpers.session import session_helper as sh + +class GetLocationSuggestionsRequester(RequesterBase): + + + async def get_user_location_suggestions(self, url_id: int) -> list[LocationAnnotationUserSuggestion]: + query = ( + select( + UserLocationSuggestion.location_id, + LocationExpandedView.display_name.label("location_name"), + func.count(UserLocationSuggestion.user_id).label('user_count') + ) + .join( + LocationExpandedView, + LocationExpandedView.id == UserLocationSuggestion.location_id + ) + .where( + UserLocationSuggestion.url_id == url_id + ) + .group_by( + UserLocationSuggestion.location_id, + LocationExpandedView.display_name + ) + .order_by( + func.count(UserLocationSuggestion.user_id).desc() + ) + ) + raw_results: Sequence[RowMapping] = await sh.mappings(self.session, query) + return [ + LocationAnnotationUserSuggestion( + **raw_result + ) + for raw_result in raw_results + ] + + + + async def get_auto_location_suggestions( + self, + url_id: int + ) -> list[LocationAnnotationAutoSuggestion]: + query = ( + select( + LocationExpandedView.display_name.label("location_name"), + LocationIDSubtaskSuggestion.location_id, + LocationIDSubtaskSuggestion.confidence, + ) + .join( + LocationExpandedView, + LocationExpandedView.id == LocationIDSubtaskSuggestion.location_id + ) + .join( + AutoLocationIDSubtask, + AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id + ) + .where( + AutoLocationIDSubtask.url_id == url_id + ) + .order_by( + LocationIDSubtaskSuggestion.confidence.desc() + ) + ) + raw_results: Sequence[RowMapping] = await sh.mappings(self.session, query) + return [ + LocationAnnotationAutoSuggestion( + **raw_result + ) + for raw_result in raw_results + ] + + async def get_not_found_count(self, url_id: int) -> int: + query = ( + select( + func.count(LinkUserSuggestionLocationNotFound.user_id) + ) + .where( + LinkUserSuggestionLocationNotFound.url_id == url_id + ) + ) + + return await sh.scalar(self.session, query=query) \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/post/models/agency.py b/src/api/endpoints/annotate/all/post/models/agency.py index 55c52e49..97574e86 100644 --- a/src/api/endpoints/annotate/all/post/models/agency.py +++ b/src/api/endpoints/annotate/all/post/models/agency.py @@ -1,18 +1,16 @@ -from pydantic import BaseModel +from pydantic import BaseModel, model_validator -from src.db.models.impl.agency.enums import JurisdictionType, AgencyType - - -class AnnotationNewAgencySuggestionInfo(BaseModel): - name: str - location_id: int - jurisdiction_type: JurisdictionType | None - agency_type: AgencyType | None class AnnotationPostAgencyInfo(BaseModel): - new_agency_suggestion: AnnotationNewAgencySuggestionInfo | None = None + not_found: bool = False agency_ids: list[int] = [] @property def empty(self) -> bool: - return self.new_agency_suggestion is None and len(self.agency_ids) == 0 + return len(self.agency_ids) == 0 + + @model_validator(mode="after") + def forbid_not_found_if_agency_ids(self): + if self.not_found and len(self.agency_ids) > 0: + raise ValueError("not_found must be False if agency_ids is not empty") + return self diff --git a/src/api/endpoints/annotate/all/post/models/location.py b/src/api/endpoints/annotate/all/post/models/location.py new file mode 100644 index 00000000..1eb7947d --- /dev/null +++ b/src/api/endpoints/annotate/all/post/models/location.py @@ -0,0 +1,16 @@ +from pydantic import BaseModel, model_validator + + +class AnnotationPostLocationInfo(BaseModel): + not_found: bool = False + location_ids: list[int] = [] + + @property + def empty(self) -> bool: + return len(self.location_ids) == 0 + + @model_validator(mode="after") + def forbid_not_found_if_location_ids(self): + if self.not_found and len(self.location_ids) > 0: + raise ValueError("not_found must be False if location_ids is not empty") + return self \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/post/models/request.py b/src/api/endpoints/annotate/all/post/models/request.py index 240c8389..9ff40f40 100644 --- a/src/api/endpoints/annotate/all/post/models/request.py +++ b/src/api/endpoints/annotate/all/post/models/request.py @@ -1,6 +1,7 @@ from pydantic import BaseModel, model_validator, ConfigDict from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo +from src.api.endpoints.annotate.all.post.models.location import AnnotationPostLocationInfo from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.core.enums import RecordType from src.core.exceptions import FailedValidationException @@ -13,7 +14,7 @@ class AllAnnotationPostInfo(BaseModel): suggested_status: URLType record_type: RecordType | None = None agency_info: AnnotationPostAgencyInfo - location_ids: list[int] + location_info: AnnotationPostLocationInfo name_info: AnnotationPostNameInfo = AnnotationPostNameInfo() @model_validator(mode="after") @@ -35,7 +36,7 @@ def forbid_all_else_if_not_relevant(self): raise FailedValidationException("record_type must be None if suggested_status is NOT RELEVANT") if not self.agency_info.empty: raise FailedValidationException("agency_info must be empty if suggested_status is NOT RELEVANT") - if len(self.location_ids) > 0: + if not self.location_info.empty: raise FailedValidationException("location_ids must be empty if suggested_status is NOT RELEVANT") return self diff --git a/src/api/endpoints/annotate/all/post/query.py b/src/api/endpoints/annotate/all/post/query.py index 95bb9102..2cbcb420 100644 --- a/src/api/endpoints/annotate/all/post/query.py +++ b/src/api/endpoints/annotate/all/post/query.py @@ -41,14 +41,15 @@ async def run(self, session: AsyncSession) -> None: if self.post_info.suggested_status == URLType.NOT_RELEVANT: return - requester.add_location_ids(self.post_info.location_ids) + requester.add_location_ids(self.post_info.location_info.location_ids) # TODO (TEST): Add test for submitting Meta URL validation requester.optionally_add_record_type(self.post_info.record_type) requester.add_agency_ids(self.post_info.agency_info.agency_ids) - await requester.optionally_add_new_agency_suggestion( - self.post_info.agency_info.new_agency_suggestion, - url_id=self.url_id, - ) + if self.post_info.location_info.not_found: + requester.add_not_found_location() + + if self.post_info.agency_info.not_found: + requester.add_not_found_agency() diff --git a/src/api/endpoints/annotate/all/post/requester.py b/src/api/endpoints/annotate/all/post/requester.py index dc19c92d..14064e8a 100644 --- a/src/api/endpoints/annotate/all/post/requester.py +++ b/src/api/endpoints/annotate/all/post/requester.py @@ -1,12 +1,11 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.api.endpoints.annotate.all.post.models.agency import AnnotationNewAgencySuggestionInfo from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.core.enums import RecordType -from src.db.models.impl.agency.suggestion.sqlalchemy import NewAgencySuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.url_new_agency_suggestion.sqlalchemy import LinkURLNewAgencySuggestion from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource @@ -97,23 +96,16 @@ async def optionally_add_name_suggestion( ) self.session.add(link) - async def optionally_add_new_agency_suggestion( - self, - suggestion_info: AnnotationNewAgencySuggestionInfo | None, - url_id: int, - ) -> None: - if suggestion_info is None: - return - new_agency_suggestion = NewAgencySuggestion( - name=suggestion_info.name, - location_id=suggestion_info.location_id, - jurisdiction_type=suggestion_info.jurisdiction_type, - agency_type=suggestion_info.agency_type, + def add_not_found_agency(self) -> None: + not_found_agency = LinkUserSuggestionAgencyNotFound( + user_id=self.user_id, + url_id=self.url_id, ) - self.session.add(new_agency_suggestion) - await self.session.flush() - link = LinkURLNewAgencySuggestion( - url_id=url_id, - suggestion_id=new_agency_suggestion.id, + self.session.add(not_found_agency) + + def add_not_found_location(self) -> None: + not_found_location = LinkUserSuggestionLocationNotFound( + user_id=self.user_id, + url_id=self.url_id, ) - self.session.add(link) + self.session.add(not_found_location) diff --git a/src/core/tasks/url/loader.py b/src/core/tasks/url/loader.py index 41e79949..86625d94 100644 --- a/src/core/tasks/url/loader.py +++ b/src/core/tasks/url/loader.py @@ -23,6 +23,7 @@ from src.core.tasks.url.operators.root_url.core import URLRootURLTaskOperator from src.core.tasks.url.operators.screenshot.core import URLScreenshotTaskOperator from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator +from src.core.tasks.url.operators.suspend.core import SuspendURLTaskOperator from src.core.tasks.url.operators.validate.core import AutoValidateURLTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.inference.client import HuggingFaceInferenceClient @@ -226,6 +227,18 @@ def _get_auto_name_task_operator(self) -> URLTaskEntry: ) ) + def _get_suspend_url_task_operator(self) -> URLTaskEntry: + operator = SuspendURLTaskOperator( + adb_client=self.adb_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_SUSPEND_TASK_FLAG", + default=True + ) + ) + async def load_entries(self) -> list[URLTaskEntry]: return [ @@ -242,4 +255,5 @@ async def load_entries(self) -> list[URLTaskEntry]: self._get_location_id_task_operator(), self._get_auto_validate_task_operator(), self._get_auto_name_task_operator(), + self._get_suspend_url_task_operator(), ] diff --git a/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/__init__.py b/src/core/tasks/url/operators/suspend/__init__.py similarity index 100% rename from src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/__init__.py rename to src/core/tasks/url/operators/suspend/__init__.py diff --git a/src/core/tasks/url/operators/suspend/core.py b/src/core/tasks/url/operators/suspend/core.py new file mode 100644 index 00000000..2dcfc53b --- /dev/null +++ b/src/core/tasks/url/operators/suspend/core.py @@ -0,0 +1,30 @@ +from src.core.tasks.url.operators.base import URLTaskOperatorBase +from src.core.tasks.url.operators.suspend.queries.get.query import GetURLsForSuspensionQueryBuilder +from src.core.tasks.url.operators.suspend.queries.get.response import GetURLsForSuspensionResponse +from src.core.tasks.url.operators.suspend.queries.insert import InsertURLSuspensionsQueryBuilder +from src.core.tasks.url.operators.suspend.queries.prereq import GetURLsForSuspensionPrerequisitesQueryBuilder +from src.db.enums import TaskType + + +class SuspendURLTaskOperator(URLTaskOperatorBase): + + @property + def task_type(self) -> TaskType: + return TaskType.SUSPEND_URLS + + async def meets_task_prerequisites(self) -> bool: + return await self.adb_client.run_query_builder( + GetURLsForSuspensionPrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + # Get URLs for auto validation + responses: list[GetURLsForSuspensionResponse] = await self.adb_client.run_query_builder( + GetURLsForSuspensionQueryBuilder() + ) + url_ids: list[int] = [response.url_id for response in responses] + await self.link_urls_to_task(url_ids) + + await self.adb_client.run_query_builder( + InsertURLSuspensionsQueryBuilder(responses) + ) diff --git a/src/db/models/impl/link/url_new_agency_suggestion/__init__.py b/src/core/tasks/url/operators/suspend/queries/__init__.py similarity index 100% rename from src/db/models/impl/link/url_new_agency_suggestion/__init__.py rename to src/core/tasks/url/operators/suspend/queries/__init__.py diff --git a/src/core/tasks/url/operators/suspend/queries/cte.py b/src/core/tasks/url/operators/suspend/queries/cte.py new file mode 100644 index 00000000..4dfc6822 --- /dev/null +++ b/src/core/tasks/url/operators/suspend/queries/cte.py @@ -0,0 +1,48 @@ +from sqlalchemy import select, func, Select, exists, or_ + +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound +from src.db.models.views.unvalidated_url import UnvalidatedURL + + +class GetURLsForSuspensionCTEContainer: + + def __init__(self): + self.cte = ( + select( + UnvalidatedURL.url_id + ) + .outerjoin( + LinkUserSuggestionAgencyNotFound, + UnvalidatedURL.url_id == LinkUserSuggestionAgencyNotFound.url_id + ) + .outerjoin( + LinkUserSuggestionLocationNotFound, + UnvalidatedURL.url_id == LinkUserSuggestionLocationNotFound.url_id + ) + .where( + ~exists( + select( + FlagURLSuspended.url_id + ) + .where( + FlagURLSuspended.url_id == UnvalidatedURL.url_id + ) + ) + ) + .group_by( + UnvalidatedURL.url_id + ) + .having( + or_( + func.count(LinkUserSuggestionAgencyNotFound.user_id) >= 2, + func.count(LinkUserSuggestionLocationNotFound.user_id) >= 2, + ) + ) + .cte("get_urls_for_suspension") + ) + + @property + def query(self) -> Select: + return select(self.cte.c.url_id) \ No newline at end of file diff --git a/src/core/tasks/url/operators/suspend/queries/get/__init__.py b/src/core/tasks/url/operators/suspend/queries/get/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/url/operators/suspend/queries/get/query.py b/src/core/tasks/url/operators/suspend/queries/get/query.py new file mode 100644 index 00000000..23a48d5b --- /dev/null +++ b/src/core/tasks/url/operators/suspend/queries/get/query.py @@ -0,0 +1,16 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.url.operators.suspend.queries.cte import GetURLsForSuspensionCTEContainer +from src.core.tasks.url.operators.suspend.queries.get.response import GetURLsForSuspensionResponse +from src.db.queries.base.builder import QueryBuilderBase +from src.db.helpers.session import session_helper as sh + +class GetURLsForSuspensionQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> list[GetURLsForSuspensionResponse]: + cte = GetURLsForSuspensionCTEContainer() + results = await sh.mappings(session=session, query=cte.query) + return [ + GetURLsForSuspensionResponse(url_id=result["url_id"]) + for result in results + ] diff --git a/src/core/tasks/url/operators/suspend/queries/get/response.py b/src/core/tasks/url/operators/suspend/queries/get/response.py new file mode 100644 index 00000000..2f207fbe --- /dev/null +++ b/src/core/tasks/url/operators/suspend/queries/get/response.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class GetURLsForSuspensionResponse(BaseModel): + url_id: int \ No newline at end of file diff --git a/src/core/tasks/url/operators/suspend/queries/insert.py b/src/core/tasks/url/operators/suspend/queries/insert.py new file mode 100644 index 00000000..e979563f --- /dev/null +++ b/src/core/tasks/url/operators/suspend/queries/insert.py @@ -0,0 +1,24 @@ +from typing import Any + +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.url.operators.suspend.queries.get.response import GetURLsForSuspensionResponse +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.db.queries.base.builder import QueryBuilderBase + + +class InsertURLSuspensionsQueryBuilder(QueryBuilderBase): + + def __init__(self, responses: list[GetURLsForSuspensionResponse]): + super().__init__() + self.responses = responses + + async def run(self, session: AsyncSession) -> Any: + models: list[FlagURLSuspended] = [] + for response in self.responses: + models.append( + FlagURLSuspended( + url_id=response.url_id, + ) + ) + session.add_all(models) diff --git a/src/core/tasks/url/operators/suspend/queries/prereq.py b/src/core/tasks/url/operators/suspend/queries/prereq.py new file mode 100644 index 00000000..416d68f6 --- /dev/null +++ b/src/core/tasks/url/operators/suspend/queries/prereq.py @@ -0,0 +1,12 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.url.operators.suspend.queries.cte import GetURLsForSuspensionCTEContainer +from src.db.helpers.session import session_helper as sh +from src.db.queries.base.builder import QueryBuilderBase + + +class GetURLsForSuspensionPrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + cte = GetURLsForSuspensionCTEContainer() + return await sh.results_exist(session=session, query=cte.query) diff --git a/src/db/enums.py b/src/db/enums.py index af2b02a7..560549a0 100644 --- a/src/db/enums.py +++ b/src/db/enums.py @@ -51,6 +51,7 @@ class TaskType(PyEnum): LOCATION_ID = "Location ID" AUTO_VALIDATE = "Auto Validate" AUTO_NAME = "Auto Name" + SUSPEND_URLS = "Suspend URLs" # Scheduled Tasks PUSH_TO_HUGGINGFACE = "Push to Hugging Face" diff --git a/src/db/models/impl/flag/url_suspended/__init__.py b/src/db/models/impl/flag/url_suspended/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/flag/url_suspended/sqlalchemy.py b/src/db/models/impl/flag/url_suspended/sqlalchemy.py new file mode 100644 index 00000000..dea3f0b0 --- /dev/null +++ b/src/db/models/impl/flag/url_suspended/sqlalchemy.py @@ -0,0 +1,17 @@ +from sqlalchemy import PrimaryKeyConstraint + +from src.db.models.mixins import URLDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base + + +class FlagURLSuspended( + Base, + URLDependentMixin, + CreatedAtMixin +): + + __tablename__ = "flag_url_suspended" + + __table_args__ = ( + PrimaryKeyConstraint("url_id"), + ) \ No newline at end of file diff --git a/src/db/models/impl/link/url_new_agency_suggestion/sqlalchemy.py b/src/db/models/impl/link/url_new_agency_suggestion/sqlalchemy.py deleted file mode 100644 index fe5daf35..00000000 --- a/src/db/models/impl/link/url_new_agency_suggestion/sqlalchemy.py +++ /dev/null @@ -1,19 +0,0 @@ -from sqlalchemy import Column, Integer, ForeignKey, PrimaryKeyConstraint -from sqlalchemy.orm import Mapped - -from src.db.models.mixins import URLDependentMixin -from src.db.models.templates_.base import Base - - -class LinkURLNewAgencySuggestion( - Base, - URLDependentMixin, -): - - __tablename__ = 'link_url_new_agency_suggestion' - - suggestion_id: Mapped[int] = Column(Integer, ForeignKey('new_agency_suggestions.id'), nullable=False) - - __table_args__ = ( - PrimaryKeyConstraint('url_id', 'suggestion_id'), - ) diff --git a/src/db/models/impl/link/user_suggestion_not_found/__init__.py b/src/db/models/impl/link/user_suggestion_not_found/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/link/user_suggestion_not_found/agency/__init__.py b/src/db/models/impl/link/user_suggestion_not_found/agency/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/link/user_suggestion_not_found/agency/sqlalchemy.py b/src/db/models/impl/link/user_suggestion_not_found/agency/sqlalchemy.py new file mode 100644 index 00000000..0092f504 --- /dev/null +++ b/src/db/models/impl/link/user_suggestion_not_found/agency/sqlalchemy.py @@ -0,0 +1,20 @@ +from sqlalchemy import PrimaryKeyConstraint +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import URLDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base +from src.util.alembic_helpers import user_id_column + + +class LinkUserSuggestionAgencyNotFound( + Base, + URLDependentMixin, + CreatedAtMixin, +): + __tablename__ = "link_user_suggestion_agency_not_found" + + user_id: Mapped[int] = user_id_column() + + __table_args__ = ( + PrimaryKeyConstraint("url_id", "user_id"), + ) \ No newline at end of file diff --git a/src/db/models/impl/link/user_suggestion_not_found/location/__init__.py b/src/db/models/impl/link/user_suggestion_not_found/location/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/link/user_suggestion_not_found/location/sqlalchemy.py b/src/db/models/impl/link/user_suggestion_not_found/location/sqlalchemy.py new file mode 100644 index 00000000..d608b04d --- /dev/null +++ b/src/db/models/impl/link/user_suggestion_not_found/location/sqlalchemy.py @@ -0,0 +1,20 @@ +from sqlalchemy import PrimaryKeyConstraint +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import URLDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base +from src.util.alembic_helpers import user_id_column + + +class LinkUserSuggestionLocationNotFound( + Base, + URLDependentMixin, + CreatedAtMixin, +): + __tablename__ = "link_user_suggestion_location_not_found" + + user_id: Mapped[int] = user_id_column() + + __table_args__ = ( + PrimaryKeyConstraint("url_id", "user_id"), + ) \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index 7721e80c..38c958ad 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -4,6 +4,7 @@ from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.get.queries.core import GetNextURLForAllAnnotationQueryBuilder from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo +from src.api.endpoints.annotate.all.post.models.location import AnnotationPostLocationInfo from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.core.enums import RecordType @@ -66,10 +67,12 @@ async def test_annotate_all( suggested_status=URLType.DATA_SOURCE, record_type=RecordType.ACCIDENT_REPORTS, agency_info=AnnotationPostAgencyInfo(agency_ids=[agency_id]), - location_ids=[ - california.location_id, - pennsylvania.location_id, - ], + location_info=AnnotationPostLocationInfo( + location_ids=[ + california.location_id, + pennsylvania.location_id, + ] + ), name_info=AnnotationPostNameInfo( new_name="New Name" ) @@ -85,8 +88,8 @@ async def test_annotate_all( url_id=url_mapping_2.url_id, all_annotations_post_info=AllAnnotationPostInfo( suggested_status=URLType.NOT_RELEVANT, - location_ids=[], - agency_info=AnnotationPostAgencyInfo(agency_ids=[]), + location_info=AnnotationPostLocationInfo(), + agency_info=AnnotationPostAgencyInfo(), name_info=AnnotationPostNameInfo( existing_name_id=setup_info_2.name_suggestion_id ) @@ -138,7 +141,7 @@ async def test_annotate_all( ) ) user_suggestions: list[LocationAnnotationUserSuggestion] = \ - response.next_annotation.location_suggestions.user + response.next_annotation.location_suggestions.user.suggestions assert len(user_suggestions) == 2 response_location_ids: list[int] = [location_suggestion.location_id for location_suggestion in user_suggestions] diff --git a/tests/automated/integration/api/annotate/all/test_new_agency.py b/tests/automated/integration/api/annotate/all/test_new_agency.py deleted file mode 100644 index 7a07b3e8..00000000 --- a/tests/automated/integration/api/annotate/all/test_new_agency.py +++ /dev/null @@ -1,64 +0,0 @@ -import pytest - -from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo, \ - AnnotationNewAgencySuggestionInfo -from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo -from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.core.enums import RecordType -from src.db.models.impl.agency.enums import JurisdictionType, AgencyType -from src.db.models.impl.agency.suggestion.sqlalchemy import NewAgencySuggestion -from src.db.models.impl.flag.url_validated.enums import URLType -from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review -from tests.helpers.setup.final_review.model import FinalReviewSetupInfo - - -@pytest.mark.asyncio -async def test_add_new_agency( - api_test_helper, - pennsylvania: USStateCreationInfo, -): - """ - Test the process for adding a new agency - Confirm a new agency suggestion is successfully added in the database. - """ - ath = api_test_helper - adb_client = ath.adb_client() - - setup_info_1: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( - db_data_creator=ath.db_data_creator, - include_user_annotations=True - ) - url_mapping_1 = setup_info_1.url_mapping - - post_response_1 = await ath.request_validator.post_all_annotations_and_get_next( - url_id=url_mapping_1.url_id, - all_annotations_post_info=AllAnnotationPostInfo( - suggested_status=URLType.DATA_SOURCE, - record_type=RecordType.ACCIDENT_REPORTS, - agency_info=AnnotationPostAgencyInfo( - new_agency_suggestion=AnnotationNewAgencySuggestionInfo( - name="New Agency", - location_id=pennsylvania.location_id, - jurisdiction_type=JurisdictionType.STATE, - agency_type=AgencyType.LAW_ENFORCEMENT, - ) - ), - location_ids=[ - pennsylvania.location_id, - ], - name_info=AnnotationPostNameInfo( - new_name="New Name" - ) - ) - ) - - # Check for existence of new agency suggestion - - suggestions: list[NewAgencySuggestion] = await adb_client.get_all(NewAgencySuggestion) - assert len(suggestions) == 1 - suggestion: NewAgencySuggestion = suggestions[0] - assert suggestion.name == "New Agency" - assert suggestion.location_id == pennsylvania.location_id - assert suggestion.jurisdiction_type == JurisdictionType.STATE - assert suggestion.agency_type == AgencyType.LAW_ENFORCEMENT \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/all/test_not_found.py b/tests/automated/integration/api/annotate/all/test_not_found.py new file mode 100644 index 00000000..251b4c0e --- /dev/null +++ b/tests/automated/integration/api/annotate/all/test_not_found.py @@ -0,0 +1,48 @@ +import pytest + +from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo +from src.api.endpoints.annotate.all.post.models.location import AnnotationPostLocationInfo +from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo +from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo +from src.core.enums import RecordType +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review + + +@pytest.mark.asyncio +async def test_not_found( + api_test_helper, +): + """ + Test that marking a URL as agency or location not found works. + """ + ath = api_test_helper + setup_info_1 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=True + ) + + post_response_1 = await ath.request_validator.post_all_annotations_and_get_next( + url_id=setup_info_1.url_mapping.url_id, + all_annotations_post_info=AllAnnotationPostInfo( + suggested_status=URLType.DATA_SOURCE, + record_type=RecordType.ACCIDENT_REPORTS, + agency_info=AnnotationPostAgencyInfo(not_found=True), + location_info=AnnotationPostLocationInfo( + not_found=True, + ), + name_info=AnnotationPostNameInfo( + new_name="New Name" + ) + ) + ) + + adb_client: AsyncDatabaseClient = ath.adb_client() + + not_found_agencies: list[LinkUserSuggestionAgencyNotFound] = await adb_client.get_all(LinkUserSuggestionAgencyNotFound) + assert len(not_found_agencies) == 1 + + not_found_locations: list[LinkUserSuggestionLocationNotFound] = await adb_client.get_all(LinkUserSuggestionLocationNotFound) + assert len(not_found_locations) == 1 \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py b/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py index fc34273f..a770329d 100644 --- a/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py +++ b/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py @@ -1,6 +1,7 @@ import pytest from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo +from src.api.endpoints.annotate.all.post.models.location import AnnotationPostLocationInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @@ -31,8 +32,8 @@ async def test_annotate_all_post_batch_filtering(api_test_helper): batch_id=setup_info_3.batch_id, all_annotations_post_info=AllAnnotationPostInfo( suggested_status=URLType.NOT_RELEVANT, - location_ids=[], - agency_info=AnnotationPostAgencyInfo(agency_ids=[]) + location_info=AnnotationPostLocationInfo(), + agency_info=AnnotationPostAgencyInfo() ) ) diff --git a/tests/automated/integration/api/annotate/all/test_suspended_url.py b/tests/automated/integration/api/annotate/all/test_suspended_url.py new file mode 100644 index 00000000..3eed8699 --- /dev/null +++ b/tests/automated/integration/api/annotate/all/test_suspended_url.py @@ -0,0 +1,29 @@ +import pytest + +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review + + +@pytest.mark.asyncio +async def test_annotate_all( + api_test_helper, +): + """ + Test that a suspended URL is not returned for annotation. + """ + ath = api_test_helper + setup_info_1 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=True + ) + + get_response_1 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_1.next_annotation is not None + + adb_client = ath.adb_client() + await adb_client.add( + FlagURLSuspended( + url_id=setup_info_1.url_mapping.url_id, + ) + ) + get_response_2 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_2.next_annotation is None \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/all/test_validation_error.py b/tests/automated/integration/api/annotate/all/test_validation_error.py index d50eca2f..db9e336a 100644 --- a/tests/automated/integration/api/annotate/all/test_validation_error.py +++ b/tests/automated/integration/api/annotate/all/test_validation_error.py @@ -1,6 +1,7 @@ import pytest from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo +from src.api.endpoints.annotate.all.post.models.location import AnnotationPostLocationInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.core.enums import RecordType from src.core.exceptions import FailedValidationException @@ -25,7 +26,7 @@ async def test_annotate_all_validation_error(api_test_helper): all_annotations_post_info=AllAnnotationPostInfo( suggested_status=URLType.NOT_RELEVANT, record_type=RecordType.ACCIDENT_REPORTS, - location_ids=[], - agency_info=AnnotationPostAgencyInfo(agency_ids=[]) + location_info=AnnotationPostLocationInfo(), + agency_info=AnnotationPostAgencyInfo() ) ) diff --git a/tests/automated/integration/tasks/url/impl/suspend/__init__.py b/tests/automated/integration/tasks/url/impl/suspend/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/impl/suspend/test_core.py b/tests/automated/integration/tasks/url/impl/suspend/test_core.py new file mode 100644 index 00000000..9e1f57d8 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/suspend/test_core.py @@ -0,0 +1,50 @@ +import pytest + +from src.core.tasks.url.operators.suspend.core import SuspendURLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.run import run_task_and_confirm_success + + +@pytest.mark.asyncio +async def test_suspend_task( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, +): + operator = SuspendURLTaskOperator( + adb_client=adb_client_test + ) + + assert not await operator.meets_task_prerequisites() + + url_id_1: int = (await db_data_creator.create_urls(count=1))[0].url_id + + assert not await operator.meets_task_prerequisites() + + await db_data_creator.not_found_location_suggestion(url_id=url_id_1) + + assert not await operator.meets_task_prerequisites() + + await db_data_creator.not_found_location_suggestion(url_id=url_id_1) + + assert await operator.meets_task_prerequisites() + + await run_task_and_confirm_success(operator) + + url_id_2: int = (await db_data_creator.create_urls(count=1))[0].url_id + + await db_data_creator.not_found_agency_suggestion(url_id=url_id_2) + + assert not await operator.meets_task_prerequisites() + + await db_data_creator.not_found_agency_suggestion(url_id=url_id_2) + + assert await operator.meets_task_prerequisites() + + await run_task_and_confirm_success(operator) + + flags: list[FlagURLSuspended] = await adb_client_test.get_all(FlagURLSuspended) + assert len(flags) == 2 + + assert {flag.url_id for flag in flags} == {url_id_1, url_id_2} \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/loader/test_happy_path.py b/tests/automated/integration/tasks/url/loader/test_happy_path.py index 61dbb8c1..a7b02e89 100644 --- a/tests/automated/integration/tasks/url/loader/test_happy_path.py +++ b/tests/automated/integration/tasks/url/loader/test_happy_path.py @@ -2,7 +2,7 @@ from src.core.tasks.url.loader import URLTaskOperatorLoader -NUMBER_OF_TASK_OPERATORS: int = 13 +NUMBER_OF_TASK_OPERATORS: int = 14 @pytest.mark.asyncio async def test_happy_path( diff --git a/tests/automated/unit/api/test_all_annotation_post_info.py b/tests/automated/unit/api/test_all_annotation_post_info.py index b19eb1b8..cb7bdb41 100644 --- a/tests/automated/unit/api/test_all_annotation_post_info.py +++ b/tests/automated/unit/api/test_all_annotation_post_info.py @@ -2,6 +2,7 @@ from pydantic import BaseModel from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo +from src.api.endpoints.annotate.all.post.models.location import AnnotationPostLocationInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.core.enums import RecordType from src.core.exceptions import FailedValidationException @@ -96,12 +97,12 @@ def test_all_annotation_post_info( suggested_status=params.suggested_status, record_type=params.record_type, agency_info=AnnotationPostAgencyInfo(agency_ids=params.agency_ids), - location_ids=params.location_ids + location_info=AnnotationPostLocationInfo(location_ids=params.location_ids) ) else: AllAnnotationPostInfo( suggested_status=params.suggested_status, record_type=params.record_type, agency_info=AnnotationPostAgencyInfo(agency_ids=params.agency_ids), - location_ids=params.location_ids + location_info=AnnotationPostLocationInfo(location_ids=params.location_ids) ) \ No newline at end of file diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index b8cc936b..ea58562b 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -21,6 +21,8 @@ from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML @@ -706,3 +708,23 @@ async def user_name_endorsement( user_id=user_id, ) await self.adb_client.add(link) + + async def not_found_location_suggestion( + self, + url_id: int, + ) -> None: + suggestion = LinkUserSuggestionLocationNotFound( + url_id=url_id, + user_id=next_int(), + ) + await self.adb_client.add(suggestion) + + async def not_found_agency_suggestion( + self, + url_id: int, + ) -> None: + suggestion = LinkUserSuggestionAgencyNotFound( + url_id=url_id, + user_id=next_int(), + ) + await self.adb_client.add(suggestion) \ No newline at end of file