diff --git a/alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py b/alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py new file mode 100644 index 00000000..6a4fd0d3 --- /dev/null +++ b/alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py @@ -0,0 +1,191 @@ +"""Add missing location/agency URL status + +Revision ID: c4f9bbf8a201 +Revises: 1fb2286a016c +Create Date: 2026-02-27 12:00:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "c4f9bbf8a201" +down_revision: Union[str, None] = "1fb2286a016c" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _create_url_status_mat_view() -> None: + op.execute( + """ + CREATE MATERIALIZED VIEW url_status_mat_view AS + WITH + urls_with_relevant_errors AS ( + SELECT + ute.url_id + FROM + url_task_error ute + WHERE + ute.task_type = ANY ( + ARRAY[ + 'Screenshot'::task_type, + 'HTML'::task_type, + 'URL Probe'::task_type + ] + ) + ), + unresolved_missing_queue AS ( + SELECT + fus.url_id + FROM + flag_url_suspended fus + LEFT JOIN link_user_suggestion_location_not_found luslnf + ON luslnf.url_id = fus.url_id + LEFT JOIN link_user_suggestion_agency_not_found lusanf + ON lusanf.url_id = fus.url_id + GROUP BY + fus.url_id + HAVING + count(luslnf.user_id) >= 2 + OR count(lusanf.user_id) >= 2 + ), + status_text AS ( + SELECT + u.id AS url_id, + CASE + WHEN fuv.type = ANY ( + ARRAY[ + 'not relevant'::url_type, + 'individual record'::url_type, + 'not found'::url_type + ] + ) THEN 'Accepted'::text + WHEN ( + fuv.type = 'data source'::url_type + AND uds.url_id IS NULL + ) OR ( + fuv.type = 'meta url'::url_type + AND udmu.url_id IS NULL + ) THEN 'Awaiting Submission'::text + WHEN ( + fuv.type = 'data source'::url_type + AND uds.url_id IS NOT NULL + ) OR ( + fuv.type = 'meta url'::url_type + AND udmu.url_id IS NOT NULL + ) THEN 'Submitted'::text + WHEN fuv.type IS NULL AND umq.url_id IS NOT NULL THEN 'Missing Location / Agency'::text + WHEN uch.url_id IS NOT NULL + AND uwm.url_id IS NOT NULL + AND us.url_id IS NOT NULL THEN 'Community Labeling'::text + WHEN uwre.url_id IS NOT NULL THEN 'Error'::text + ELSE 'Intake'::text + END AS status + FROM + urls u + LEFT JOIN urls_with_relevant_errors uwre + ON u.id = uwre.url_id + LEFT JOIN url_screenshot us + ON u.id = us.url_id + LEFT JOIN url_compressed_html uch + ON u.id = uch.url_id + LEFT JOIN url_web_metadata uwm + ON u.id = uwm.url_id + LEFT JOIN flag_url_validated fuv + ON u.id = fuv.url_id + LEFT JOIN ds_app_link_meta_url udmu + ON u.id = udmu.url_id + LEFT JOIN ds_app_link_data_source uds + ON u.id = uds.url_id + LEFT JOIN unresolved_missing_queue umq + ON u.id = umq.url_id + ) + SELECT + status_text.url_id, + status_text.status, + CASE status_text.status + WHEN 'Intake'::text THEN 100 + WHEN 'Error'::text THEN 110 + WHEN 'Community Labeling'::text THEN 200 + WHEN 'Accepted'::text THEN 300 + WHEN 'Missing Location / Agency'::text THEN 320 + WHEN 'Awaiting Submission'::text THEN 380 + WHEN 'Submitted'::text THEN 390 + ELSE '-1'::integer + END AS code + FROM + status_text; + """ + ) + + +def upgrade() -> None: + op.execute("DROP MATERIALIZED VIEW IF EXISTS url_status_mat_view") + _create_url_status_mat_view() + + +def downgrade() -> None: + op.execute("DROP MATERIALIZED VIEW IF EXISTS url_status_mat_view") + op.execute( + """ + CREATE MATERIALIZED VIEW url_status_mat_view AS + WITH + urls_with_relevant_errors AS ( + SELECT + ute.url_id + FROM + url_task_error ute + WHERE + ute.task_type = ANY (ARRAY ['Screenshot'::task_type, 'HTML'::task_type, 'URL Probe'::task_type]) + ), + status_text AS ( + SELECT + u.id AS url_id, + CASE + WHEN fuv.type = ANY ( + ARRAY['not relevant'::url_type, 'individual record'::url_type, 'not found'::url_type] + ) THEN 'Accepted'::text + WHEN fuv.type = 'data source'::url_type AND uds.url_id IS NULL OR + fuv.type = 'meta url'::url_type AND udmu.url_id IS NULL THEN 'Awaiting Submission'::text + WHEN fuv.type = 'data source'::url_type AND uds.url_id IS NOT NULL OR + fuv.type = 'meta url'::url_type AND udmu.url_id IS NOT NULL THEN 'Submitted'::text + WHEN uch.url_id IS NOT NULL AND uwm.url_id IS NOT NULL AND us.url_id IS NOT NULL + THEN 'Community Labeling'::text + WHEN uwre.url_id IS NOT NULL THEN 'Error'::text + ELSE 'Intake'::text + END AS status + FROM + urls u + LEFT JOIN urls_with_relevant_errors uwre + ON u.id = uwre.url_id + LEFT JOIN url_screenshot us + ON u.id = us.url_id + LEFT JOIN url_compressed_html uch + ON u.id = uch.url_id + LEFT JOIN url_web_metadata uwm + ON u.id = uwm.url_id + LEFT JOIN flag_url_validated fuv + ON u.id = fuv.url_id + LEFT JOIN ds_app_link_meta_url udmu + ON u.id = udmu.url_id + LEFT JOIN ds_app_link_data_source uds + ON u.id = uds.url_id + ) + SELECT + status_text.url_id, + status_text.status, + CASE status_text.status + WHEN 'Intake'::text THEN 100 + WHEN 'Error'::text THEN 110 + WHEN 'Community Labeling'::text THEN 200 + WHEN 'Accepted'::text THEN 300 + WHEN 'Awaiting Submission'::text THEN 380 + WHEN 'Submitted'::text THEN 390 + ELSE '-1'::integer + END AS code + FROM + status_text; + """ + ) diff --git a/src/api/endpoints/annotate/missing/__init__.py b/src/api/endpoints/annotate/missing/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/api/endpoints/annotate/missing/__init__.py @@ -0,0 +1 @@ + diff --git a/src/api/endpoints/annotate/missing/get/__init__.py b/src/api/endpoints/annotate/missing/get/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/api/endpoints/annotate/missing/get/__init__.py @@ -0,0 +1 @@ + diff --git a/src/api/endpoints/annotate/missing/get/models.py b/src/api/endpoints/annotate/missing/get/models.py new file mode 100644 index 00000000..9e9c4796 --- /dev/null +++ b/src/api/endpoints/annotate/missing/get/models.py @@ -0,0 +1,20 @@ +from pydantic import BaseModel, Field + + +class MissingAnnotationQueueEntry(BaseModel): + url_id: int = Field( + description="The URL ID in the missing location/agency queue." + ) + url: str = Field( + description="The URL string." + ) + missing_location_count: int = Field( + description="Number of users who marked the location as missing." + ) + missing_agency_count: int = Field( + description="Number of users who marked the agency as missing." + ) + + +class MissingAnnotationQueueResponse(BaseModel): + entries: list[MissingAnnotationQueueEntry] diff --git a/src/api/endpoints/annotate/missing/get/query.py b/src/api/endpoints/annotate/missing/get/query.py new file mode 100644 index 00000000..f443b711 --- /dev/null +++ b/src/api/endpoints/annotate/missing/get/query.py @@ -0,0 +1,86 @@ +from typing import Sequence + +from sqlalchemy import select, RowMapping, func, or_ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.annotate.missing.get.models import MissingAnnotationQueueEntry, MissingAnnotationQueueResponse +from src.db.helpers.session import session_helper as sh +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.views.unvalidated_url import UnvalidatedURL +from src.db.queries.base.builder import QueryBuilderBase + + +class GetMissingAnnotationQueueQueryBuilder(QueryBuilderBase): + + def __init__( + self, + limit: int = 200, + ): + super().__init__() + self.limit = limit + + async def run( + self, + session: AsyncSession + ) -> MissingAnnotationQueueResponse: + location_count_subquery = ( + select( + func.count(LinkUserSuggestionLocationNotFound.user_id) + ) + .where( + LinkUserSuggestionLocationNotFound.url_id == URL.id + ) + .scalar_subquery() + ) + + agency_count_subquery = ( + select( + func.count(LinkUserSuggestionAgencyNotFound.user_id) + ) + .where( + LinkUserSuggestionAgencyNotFound.url_id == URL.id + ) + .scalar_subquery() + ) + + query = ( + select( + URL.id.label("url_id"), + URL.full_url.label("url"), + location_count_subquery.label("missing_location_count"), + agency_count_subquery.label("missing_agency_count"), + ) + .join( + FlagURLSuspended, + FlagURLSuspended.url_id == URL.id, + ) + .join( + UnvalidatedURL, + UnvalidatedURL.url_id == URL.id, + ) + .where( + or_( + location_count_subquery >= 2, + agency_count_subquery >= 2, + ) + ) + .order_by(URL.id.asc()) + .limit(self.limit) + ) + + mappings: Sequence[RowMapping] = await sh.mappings( + session=session, + query=query, + ) + + return MissingAnnotationQueueResponse( + entries=[ + MissingAnnotationQueueEntry( + **mapping, + ) + for mapping in mappings + ] + ) diff --git a/src/api/endpoints/annotate/missing/post/__init__.py b/src/api/endpoints/annotate/missing/post/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/api/endpoints/annotate/missing/post/__init__.py @@ -0,0 +1 @@ + diff --git a/src/api/endpoints/annotate/missing/post/models.py b/src/api/endpoints/annotate/missing/post/models.py new file mode 100644 index 00000000..3f7b81b3 --- /dev/null +++ b/src/api/endpoints/annotate/missing/post/models.py @@ -0,0 +1,6 @@ +from src.api.shared.models.request_base import RequestBase + + +class ResolveMissingAnnotationRequest(RequestBase): + location_id: int + agency_id: int diff --git a/src/api/endpoints/annotate/missing/post/query.py b/src/api/endpoints/annotate/missing/post/query.py new file mode 100644 index 00000000..5ee9b7ba --- /dev/null +++ b/src/api/endpoints/annotate/missing/post/query.py @@ -0,0 +1,87 @@ +from sqlalchemy import delete, select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.annotate.missing.post.models import ResolveMissingAnnotationRequest +from src.api.shared.models.message_response import MessageResponse +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound +from src.db.queries.base.builder import QueryBuilderBase + + +class ResolveMissingAnnotationQueryBuilder(QueryBuilderBase): + + def __init__( + self, + user_id: int, + url_id: int, + post_info: ResolveMissingAnnotationRequest, + ): + super().__init__() + self.user_id = user_id + self.url_id = url_id + self.post_info = post_info + + async def run( + self, + session: AsyncSession + ) -> MessageResponse: + existing_location_annotation = ( + await session.execute( + select(AnnotationLocationUser) + .where( + AnnotationLocationUser.url_id == self.url_id, + AnnotationLocationUser.user_id == self.user_id, + AnnotationLocationUser.location_id == self.post_info.location_id, + ) + ) + ).scalar_one_or_none() + if existing_location_annotation is None: + session.add( + AnnotationLocationUser( + url_id=self.url_id, + user_id=self.user_id, + location_id=self.post_info.location_id, + ) + ) + + existing_agency_annotation = ( + await session.execute( + select(AnnotationAgencyUser) + .where( + AnnotationAgencyUser.url_id == self.url_id, + AnnotationAgencyUser.user_id == self.user_id, + AnnotationAgencyUser.agency_id == self.post_info.agency_id, + ) + ) + ).scalar_one_or_none() + if existing_agency_annotation is None: + session.add( + AnnotationAgencyUser( + url_id=self.url_id, + user_id=self.user_id, + agency_id=self.post_info.agency_id, + ) + ) + + await session.execute( + delete(LinkUserSuggestionLocationNotFound).where( + LinkUserSuggestionLocationNotFound.url_id == self.url_id + ) + ) + await session.execute( + delete(LinkUserSuggestionAgencyNotFound).where( + LinkUserSuggestionAgencyNotFound.url_id == self.url_id + ) + ) + await session.execute( + delete(FlagURLSuspended).where( + FlagURLSuspended.url_id == self.url_id + ) + ) + + return MessageResponse( + message="Missing location/agency annotations resolved." + ) diff --git a/src/api/endpoints/annotate/routes.py b/src/api/endpoints/annotate/routes.py index 0af2afcb..c941ab0a 100644 --- a/src/api/endpoints/annotate/routes.py +++ b/src/api/endpoints/annotate/routes.py @@ -9,6 +9,10 @@ from src.api.endpoints.annotate.all.get.queries.agency.core import GetAgencySuggestionsQueryBuilder from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.api.endpoints.annotate.all.post.query import AddAllAnnotationsToURLQueryBuilder +from src.api.endpoints.annotate.missing.get.models import MissingAnnotationQueueResponse +from src.api.endpoints.annotate.missing.get.query import GetMissingAnnotationQueueQueryBuilder +from src.api.endpoints.annotate.missing.post.models import ResolveMissingAnnotationRequest +from src.api.endpoints.annotate.missing.post.query import ResolveMissingAnnotationQueryBuilder from src.api.endpoints.annotate.anonymous.get.query import GetNextURLForAnonymousAnnotationQueryBuilder from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse from src.api.endpoints.annotate.anonymous.post.query import AddAnonymousAnnotationsToURLQueryBuilder @@ -146,3 +150,31 @@ async def get_agency_suggestions( ) ) + +@annotate_router.get("/missing") +async def get_missing_annotations_queue( + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_standard_user_access_info), + limit: int = Query(default=200, ge=1, le=1000), +) -> MissingAnnotationQueueResponse: + return await async_core.adb_client.run_query_builder( + GetMissingAnnotationQueueQueryBuilder( + limit=limit + ) + ) + + +@annotate_router.post("/missing/{url_id}") +async def resolve_missing_annotation( + url_id: int, + post_info: ResolveMissingAnnotationRequest, + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_standard_user_access_info), +) -> MessageResponse: + return await async_core.adb_client.run_query_builder( + ResolveMissingAnnotationQueryBuilder( + user_id=access_info.user_id, + url_id=url_id, + post_info=post_info, + ) + ) diff --git a/src/db/models/materialized_views/url_status/enums.py b/src/db/models/materialized_views/url_status/enums.py index a467a33d..e5de8145 100644 --- a/src/db/models/materialized_views/url_status/enums.py +++ b/src/db/models/materialized_views/url_status/enums.py @@ -4,7 +4,8 @@ class URLStatusViewEnum(Enum): INTAKE = "Intake" ACCEPTED = "Accepted" + MISSING_LOCATION_AGENCY = "Missing Location / Agency" AWAITING_SUBMISSION = "Awaiting Submission" SUBMITTED = "Submitted" ERROR = "Error" - COMMUNITY_LABELING = "Community Labeling" \ No newline at end of file + COMMUNITY_LABELING = "Community Labeling" diff --git a/tests/automated/integration/api/annotate/missing/__init__.py b/tests/automated/integration/api/annotate/missing/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/automated/integration/api/annotate/missing/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/automated/integration/api/annotate/missing/test_core.py b/tests/automated/integration/api/annotate/missing/test_core.py new file mode 100644 index 00000000..dd28afba --- /dev/null +++ b/tests/automated/integration/api/annotate/missing/test_core.py @@ -0,0 +1,141 @@ +import pytest +from sqlalchemy import select + +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from tests.automated.integration.conftest import MOCK_USER_ID +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo + + +@pytest.mark.asyncio +async def test_get_missing_annotations_queue( + api_test_helper, +): + ath = api_test_helper + + url_ids = [mapping.url_id for mapping in await ath.db_data_creator.create_urls(count=3)] + qualifying_url_id = url_ids[0] + unsuspended_url_id = url_ids[1] + not_seconded_url_id = url_ids[2] + + await ath.db_data_creator.not_found_location_suggestion(url_id=qualifying_url_id) + await ath.db_data_creator.not_found_location_suggestion(url_id=qualifying_url_id) + await ath.db_data_creator.not_found_agency_suggestion(url_id=qualifying_url_id) + await ath.db_data_creator.not_found_agency_suggestion(url_id=qualifying_url_id) + await ath.adb_client().add( + FlagURLSuspended( + url_id=qualifying_url_id + ) + ) + + await ath.db_data_creator.not_found_location_suggestion(url_id=unsuspended_url_id) + await ath.db_data_creator.not_found_location_suggestion(url_id=unsuspended_url_id) + + await ath.db_data_creator.not_found_agency_suggestion(url_id=not_seconded_url_id) + await ath.adb_client().add( + FlagURLSuspended( + url_id=not_seconded_url_id + ) + ) + + response = ath.request_validator.get( + url="/annotate/missing" + ) + entries = response["entries"] + + assert len(entries) == 1 + assert entries[0]["url_id"] == qualifying_url_id + assert entries[0]["missing_location_count"] == 2 + assert entries[0]["missing_agency_count"] == 2 + + +@pytest.mark.asyncio +async def test_resolve_missing_annotations( + api_test_helper, + pittsburgh_locality: LocalityCreationInfo, + test_agency_id: int, +): + ath = api_test_helper + url_id = (await ath.db_data_creator.create_urls(count=1))[0].url_id + + await ath.db_data_creator.not_found_location_suggestion(url_id=url_id) + await ath.db_data_creator.not_found_location_suggestion(url_id=url_id) + await ath.db_data_creator.not_found_agency_suggestion(url_id=url_id) + await ath.db_data_creator.not_found_agency_suggestion(url_id=url_id) + await ath.adb_client().add( + FlagURLSuspended( + url_id=url_id + ) + ) + + response = ath.request_validator.post( + url=f"/annotate/missing/{url_id}", + json={ + "location_id": pittsburgh_locality.location_id, + "agency_id": test_agency_id, + } + ) + assert response["message"] == "Missing location/agency annotations resolved." + + queue_response = ath.request_validator.get( + url="/annotate/missing" + ) + assert queue_response["entries"] == [] + + location_annotations: list[AnnotationLocationUser] = await ath.adb_client().get_all(AnnotationLocationUser) + agency_annotations: list[AnnotationAgencyUser] = await ath.adb_client().get_all(AnnotationAgencyUser) + assert len(location_annotations) == 1 + assert len(agency_annotations) == 1 + + location_annotation = location_annotations[0] + agency_annotation = agency_annotations[0] + assert location_annotation.url_id == url_id + assert location_annotation.user_id == MOCK_USER_ID + assert location_annotation.location_id == pittsburgh_locality.location_id + assert agency_annotation.url_id == url_id + assert agency_annotation.user_id == MOCK_USER_ID + assert agency_annotation.agency_id == test_agency_id + + not_found_agency_rows: list[LinkUserSuggestionAgencyNotFound] = await ath.adb_client().get_all( + LinkUserSuggestionAgencyNotFound + ) + not_found_location_rows: list[LinkUserSuggestionLocationNotFound] = await ath.adb_client().get_all( + LinkUserSuggestionLocationNotFound + ) + suspended_rows: list[FlagURLSuspended] = await ath.adb_client().get_all(FlagURLSuspended) + + assert not_found_agency_rows == [] + assert not_found_location_rows == [] + assert suspended_rows == [] + + +@pytest.mark.asyncio +async def test_missing_annotations_status_code( + api_test_helper, +): + ath = api_test_helper + url_id = (await ath.db_data_creator.create_urls(count=1))[0].url_id + + await ath.db_data_creator.not_found_location_suggestion(url_id=url_id) + await ath.db_data_creator.not_found_location_suggestion(url_id=url_id) + await ath.adb_client().add( + FlagURLSuspended( + url_id=url_id + ) + ) + await ath.adb_client().refresh_materialized_views() + + result = await ath.adb_client().execute( + select(URLStatusMaterializedView).where( + URLStatusMaterializedView.url_id == url_id + ) + ) + status_row = result.scalar_one() + + assert status_row.status == URLStatusViewEnum.MISSING_LOCATION_AGENCY.value + assert status_row.code == 320